def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Exemple #2
0
 def metric_fn(per_example_loss, label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      11, [1, 2, 4, 5, 6, 7, 8, 9],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                11, [1, 2, 4, 5, 6, 7, 8, 9],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       11, [1, 2, 4, 5, 6, 7, 8, 9],
                       average="macro")
     loss = tf.metrics.mean(per_example_loss)
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         "eval_loss": loss,
     }
Exemple #3
0
 def metric_fn(label_ids, pred_ids, per_example_loss,
               input_mask):
     # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"]
     indices = [2, 3, 4, 5, 6, 7, 8, 9]
     precision = tf_metrics.precision(label_ids, pred_ids,
                                      num_labels, indices,
                                      input_mask)
     recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                indices, input_mask)
     f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices,
                       input_mask)
     accuracy = tf.metrics.accuracy(label_ids, pred_ids,
                                    input_mask)
     loss = tf.metrics.mean(per_example_loss)
     return {
         'eval_precision': precision,
         'eval_recall': recall,
         'eval_f': f,
         'eval_accuracy': accuracy,
         'eval_loss': loss,
     }
Exemple #4
0
 def metric_fn(per_example_loss, label_ids, logits):
     # def metric_fn(label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      FLAGS.num_labels, [1, 2],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                FLAGS.num_labels, [1, 2],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       FLAGS.num_labels, [1, 2],
                       average="macro")
     #
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         #"eval_loss": loss,
     }
 def metric_fn(label_ids, pred_ids, num_labels):
     # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     pos_indices = [id for id in range(2, num_labels - 3)]
     # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
     #                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
     # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
     precision = tf_metrics.precision(
         label_ids, pred_ids, num_labels, pos_indices, average="micro")
     recall = tf_metrics.recall(
         label_ids, pred_ids, num_labels, pos_indices, average="micro")
     f = tf_metrics.f1(label_ids, pred_ids,
                       num_labels, pos_indices, average="micro")
     # hook_dict['precision'] = precision
     # hook_dict['recall'] = recall
     # hook_dict['f'] = f
     # tf.summary.scalar('precision', precision)
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         # "eval_loss": loss,
     }
 def metric_fn(per_example_loss, label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     pos_indices = list(range(2, num_labels - 3))
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      num_labels,
                                      pos_indices,
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                num_labels,
                                pos_indices,
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       num_labels,
                       pos_indices,
                       average="macro")
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
     }
Exemple #7
0
def eval_phase(label_ids, pred_ids, num_labels):
    # 首先对结果进行维特比解码
    # crf 解码
    eval_list = []
    assert True == os.path.exists(
        os.path.join(FLAGS.output_dir, "eval_ids_list.txt"))
    list_file = open(os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r')
    contents = list_file.readlines()
    for item in contents:
        eval_list.append(int(
            item.strip()))  ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的
    assert 0 < len(eval_list)
    print("eval_list:", eval_list)
    weight = tf.sequence_mask(FLAGS.max_seq_length)
    precision = tf_metrics.precision(label_ids, pred_ids, num_labels,
                                     eval_list, weight)
    tf.summary.scalar("precision", precision[1])
    recall = tf_metrics.recall(label_ids, pred_ids, num_labels, eval_list,
                               weight)
    tf.summary.scalar("recall", recall[1])
    f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list, weight)
    tf.summary.scalar("f1", f[1])
    return (precision, recall, f)
Exemple #8
0
 def metric_fn(per_example_loss, label_ids, logits,
               is_real_example):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                num_classes=num_labels,
                                average='micro')
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      num_classes=num_labels,
                                      average='micro')
     f1 = tf_metrics.f1(label_ids,
                        predictions,
                        num_classes=num_labels,
                        average='micro')
     loss = tf.metrics.mean(values=per_example_loss,
                            weights=is_real_example)
     return {
         "eval_recall": recall,
         'eval_precision': precision,
         'eval_f1': f1,
         "eval_loss": loss,
     }
Exemple #9
0
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(FLAGS.max_seq_length)
                # print(label_ids.get_shape())  #[?,128]
                # print(pred_ids.get_shape())   #[64,128]
                # print(num_labels)  #17
                # print(weight.get_shape())  #[128,]
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels,
                                                 [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           [2, 3, 4, 5, 6, 7], weight)
                f = tf_metrics.f1(label_ids, pred_ids, num_labels,
                                  [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Exemple #10
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # labels = []
                # for i, x in enumerate()
                predict_labels = []
                # for i in range(1, num_labels - 4):
                #     predict_labels.append(i)
                # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro")
                # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro")
                # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro")

                precision = tf_metrics.precision(label_ids, predictions, num_labels, average="macro")
                recall = tf_metrics.recall(label_ids, predictions, num_labels, average="macro")
                f = tf_metrics.f1(label_ids, predictions, num_labels, average="macro")

                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
 def metric_fn(per_example_loss, label_ids, logits):
     # def metric_fn(label_ids, logits):
     predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
     n_class = [i for i in range(len(LABELS) - 2)]
     precision = tf_metrics.precision(label_ids,
                                      predictions,
                                      len(LABELS) + 1, [1, 2, 3],
                                      average="macro")
     recall = tf_metrics.recall(label_ids,
                                predictions,
                                len(LABELS) + 1, [1, 2, 3],
                                average="macro")
     f = tf_metrics.f1(label_ids,
                       predictions,
                       len(LABELS) + 1, [1, 2, 3],
                       average="macro")
     #
     return {
         "eval_precision": precision,
         "eval_recall": recall,
         "eval_f": f,
         # "eval_loss": loss,
     }
Exemple #12
0
            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # print("predictions shape: " + str(predictions.get_shape().as_list()))
                # print("label_ids shape: " + str(label_ids.get_shape().as_list()))
                # print("is_real_example shape: " + str(is_real_example.get_shape().as_list()))
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 num_labels, [2, 3, 4, 5],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           num_labels, [2, 3, 4, 5],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  num_labels, [2, 3, 4, 5],
                                  average="macro")

                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)

                # precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                # recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                # f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro")
                #
                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }
Exemple #13
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                print(logits)
                print(predictions)
                print(label_ids)

                #label_ids_array = label_ids.eval()
                #predictions_array = predictions.eval()
                #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                #print(predictions_array)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions)
                precision = tf_metrics.precision(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                recall = tf_metrics.recall(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                f = tf_metrics.f1(
                    label_ids,
                    predictions,
                    19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                    average="micro")
                #class_repo = classification_report(label_ids_array, predictions_array )
                return {
                    "eval_accuracy": accuracy,
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    #    "eval_class_repo":class_repo,
                    #"eval_loss": loss,
                }
Exemple #14
0
            def metric_fn(label_ids, predicted_labels, input_mask, num_labels):

                label_ids = tf.boolean_mask(label_ids, input_mask)
                predicted_labels = tf.boolean_mask(predicted_labels,
                                                   input_mask)

                precision = tf_metrics.precision(label_ids,
                                                 predicted_labels,
                                                 num_labels, [1, 2, 3],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predicted_labels,
                                           num_labels, [1, 2, 3],
                                           average="macro")
                f1 = tf_metrics.f1(label_ids,
                                   predicted_labels,
                                   num_labels, [1, 2, 3],
                                   average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f1
                }
Exemple #15
0
 def metric_fn(seq_length, max_len, label_ids, pred_ids):
     indices = [1, 2]  # indice参数告诉评估矩阵评估哪些标签
     # Metrics
     weights = tf.sequence_mask(seq_length, maxlen=max_len)
     tf.logging.info("****shape in metrics***")
     label_ids_metric = tf.argmax(label_ids, 1)
     tf.logging.info(label_ids_metric.shape)
     tf.logging.info(pred_ids.shape)
     metrics = {
         'acc':
         tf.metrics.accuracy(label_ids_metric, pred_ids),
         'precision':
         precision(label_ids_metric, pred_ids, params['num_labels'],
                   indices),
         'recall':
         recall(label_ids_metric, pred_ids, params['num_labels'],
                indices),
         'f1':
         f1(label_ids_metric, pred_ids, params['num_labels'],
            indices),
     }
     for metric_name, op in metrics.items():
         tf.summary.scalar(metric_name, op[1])
     return eval_metrics
Exemple #16
0
            def metric_fn(label_ids, logits, trans):
                # 首先对结果进行维特比解码
                # crf 解码

                weight = tf.sequence_mask(max_seq_length)
                # 0对应填充,1对应SEP,  是命名实体的当做正例(多分类时需要指定的),后面索引对应label_ids的
                # precision = TP / (TP + FP)   # 预测为正的样本中实际正样本的比例
                # recall = TP / (TP + FN)      # 实际正样本中预测为正的比例
                # accuracy = (TP + TN) / (P + N)
                # F1-score = 2 / [(1 / precision) + (1 / recall)]
                precision = tf_metrics.precision(label_ids, pred_ids,
                                                 num_labels,
                                                 [2, 3, 4, 5, 6, 7], weight)
                recall = tf_metrics.recall(label_ids, pred_ids, num_labels,
                                           [2, 3, 4, 5, 6, 7], weight)
                f1 = tf_metrics.f1(label_ids, pred_ids, num_labels,
                                   [2, 3, 4, 5, 6, 7], weight)

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f1": f1,
                    # "eval_loss": loss,
                }
Exemple #17
0
            def metric_fn(per_example_loss, label_ids, logits):
                # def metric_fn(label_ids, logits):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E,
                # 具体见 tf.metrics里的函数
                precision = tf_metrics.precision(label_ids,
                                                 predictions,
                                                 10, [1, 2, 3, 4, 5, 6],
                                                 average="macro")
                recall = tf_metrics.recall(label_ids,
                                           predictions,
                                           10, [1, 2, 3, 4, 5, 6],
                                           average="macro")
                f = tf_metrics.f1(label_ids,
                                  predictions,
                                  10, [1, 2, 3, 4, 5, 6],
                                  average="macro")

                return {
                    "eval_precision": precision,
                    "eval_recall": recall,
                    "eval_f": f,
                    # "eval_loss": loss,
                }
Exemple #18
0
def model_fn(features, labels, mode, params):
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    input_ids = features["text"]
    author_id = features["author"]
    category_ids = features["categories"]
    label_id = features["label"]
    cnn = CnnModel(params, input_ids, author_id, category_ids, training)

    squeeze_label_ids = tf.squeeze(label_id, axis=1)
    logits, predict_label_ids, loss = cnn.build_network(squeeze_label_ids,)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        #words = tf.contrib.lookup.index_to_string_table_from_file(params['vocab'])
        #input_words = words.lookup(tf.to_int64(input_ids))
        predictions = {
            'true_label_ids': squeeze_label_ids,
            'predict_label_ids': predict_label_ids,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss

        train_op = optimization.create_optimizer(loss, params['learning_rate'], params['train_steps'], params['num_warmup_steps'])
        if mode == tf.estimator.ModeKeys.EVAL:
            # Metrics
            metrics = {
                'acc': tf.metrics.accuracy(squeeze_label_ids, predict_label_ids),
                # 分别计算各个类的P, R 然后按类求平均值
                'precision': precision(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
                'recall': recall(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
                'f1': f1(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'),
            }
            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
        elif mode == tf.estimator.ModeKeys.TRAIN:
            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
Exemple #19
0
        def model_fn(features, labels, mode, params):
            # For serving, features are a bit different
            #if isinstance(features, dict):
            #    features = features['words'], features['nwords']

            # Read vocabs and inputs
            #import ipdb
            #ipdb.set_trace()
            dropout = args.dropout
            #input_ids = features["input_ids"]
            #mask = features["mask"]
            #segment_ids = features["segment_ids"]
            #label_ids = features["label_ids"]
            ##words, nwords = features
            #tf.print(' '.join(words[4]), output_stream=sys.stderr)
            training = (mode == tf.estimator.ModeKeys.TRAIN)
            #vocab_words = tf.contrib.lookup.index_table_from_file(
            #    #args.vocab_words)
            #    args.vocab_words, num_oov_buckets=args.num_oov_buckets)
            #with Path(args.vocab_tags).open() as f:
            #    indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
            #    num_tags = len(indices) + 1

            ##word_ids = vocab_words.lookup(words)
            if args.embedding == 'word2id':
                # word2id
                with Path(args.vocab_words).open(encoding='utf-8') as f:
                    vocab_words_1 = f.readlines()
                    vocab_length = len(vocab_words_1)
                input_ids = features["input_ids"]
                label_ids = features["label_ids"]
                mask = features["mask"]
                embeddings = embedding(input_ids, vocab_length, args)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            elif args.embedding == 'bert':
                from my_model.embeddings.embedding import get_bert_embedding
                input_ids = features["input_ids"]
                mask = features["mask"]
                segment_ids = features["segment_ids"]
                label_ids = features["label_ids"]
                embeddings = get_bert_embedding(args.bert_config_file,
                                                training,
                                                input_ids,
                                                mask,
                                                segment_ids,
                                                use_one_hot_embeddings=False)

            else:
                # Word Embeddings
                # deafult
                input_ids = features["input_ids"]
                label_ids = features["label_ids"]
                mask = features["mask"]
                glove = np.load(args.glove)['embeddings']  # np.array
                variable = np.vstack([glove, [[0.] * args.dim]])
                variable = tf.Variable(variable,
                                       dtype=tf.float32,
                                       trainable=False)
                embeddings = tf.nn.embedding_lookup(variable, input_ids)
                embeddings = tf.layers.dropout(embeddings,
                                               rate=dropout,
                                               training=training)
                pass

            (total_loss, logits,
             predicts) = create_model(embeddings,
                                      label_ids,
                                      mask,
                                      training,
                                      self.num_labels,
                                      use_one_hot_embeddings=False)
            tvars = tf.trainable_variables()
            initialized_variable_names = None
            scaffold_fn = None
            if args.init_checkpoint:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, args.init_checkpoint)
                tf.train.init_from_checkpoint(args.init_checkpoint,
                                              assignment_map)
                self.logging.debug("**** Trainable Variables ****")
                for var in tvars:
                    init_string = ""
                    if var.name in initialized_variable_names:
                        init_string = ", *INIT_FROM_CKPT*"
                    self.logging.debug("  name = %s, shape = %s%s", var.name,
                                       var.shape, init_string)
            if mode == tf.estimator.ModeKeys.TRAIN:
                warmup_steps = args.warmup_steps
                step = tf.to_float(tf.train.get_global_step())
                if args.learning_rate_decay == 'sqrt':
                    lr_warmup = args.learning_rate_peak * tf.minimum(
                        1.0, step / warmup_steps)
                    lr_decay = args.learning_rate_peak * tf.minimum(
                        1.0, tf.sqrt(warmup_steps / step))
                    lr = tf.where(step < warmup_steps, lr_warmup, lr_decay)
                elif args.learning_rate_decay == 'exp':
                    lr = tf.train.exponential_decay(
                        args.learning_rate_peak,
                        global_step=step,
                        decay_steps=args.decay_steps,
                        decay_rate=args.decay_rate)
                elif args.learning_rate_decay == 'bert':
                    num_train_steps = int(self.len_train_examples /
                                          args.batch_size * args.epochs)
                    #num_warmup_steps = int(num_train_steps * args.warmup_steps)
                    num_warmup_steps = int(num_train_steps * 0.1)
                    train_op = optimization.create_optimizer(
                        total_loss,
                        args.learning_rate,
                        num_train_steps,
                        num_warmup_steps,
                        use_tpu=False)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        loss=total_loss,
                        train_op=train_op,
                        #scaffold_fn=scaffold_fn
                    )
                    return output_spec
                else:
                    self.logging.info(
                        'learning rate decay strategy not supported')
                    sys.exit()
                tf.print(lr)
                train_op = tf.train.AdamOptimizer(lr).minimize(
                    total_loss,
                    global_step=tf.train.get_or_create_global_step())
                #return tf.estimator.EstimatorSpec(
                #    mode, loss=loss, train_op=train_op)

                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    train_op=train_op,
                    #scaffold_fn=scaffold_fn
                )

            elif mode == tf.estimator.ModeKeys.EVAL:
                #def metric_fn(label_ids, logits,num_labels,mask):
                #    predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                #    cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask)
                #    return {
                #        "confusion_matrix":cm
                #    }
                #    #
                #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask])
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                # Metrics
                #weights = tf.sequence_mask(nwords)
                weights = mask
                #mask2len = tf.reduce_sum(mask,axis=1)
                #weights = tf.sequence_mask(mask2len)
                #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                pred_ids = predicts
                num_label_ids = self.num_labels
                metrics = {
                    'acc':
                    tf.metrics.accuracy(label_ids, pred_ids, weights),
                    #'precision': tf.metrics.precision(label_ids, pred_ids, weights),
                    #'recall': tf.metrics.recall(label_ids, pred_ids, weights),
                    ##'f1': f1(label_ids, pred_ids, weights),
                    'precision':
                    precision(label_ids, pred_ids, self.num_labels,
                              self.indices, weights),
                    'recall':
                    recall(label_ids, pred_ids, self.num_labels, self.indices,
                           weights),
                    'f1':
                    f1(label_ids, pred_ids, self.num_labels, self.indices,
                       weights),
                }
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metrics
                    #scaffold_fn=scaffold_fn
                )
            else:
                #output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predicts,
                    #scaffold_fn=scaffold_fn
                )
            return output_spec
Exemple #20
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1]
    # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(
            indices) + 1  # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings,学习字符嵌入向量
    char_ids = vocab_chars.lookup(chars)
    # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后
    # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']],
                               dtype=tf.float32)
    # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars'])))
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码
    mask = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, mask,
                                            params['filters'],
                                            params['kernel_size'])

    # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # Bi-LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关
    # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征
    logits = tf.layers.dense(
        output,
        num_tags)  # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        mask = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, mask),
            'precision': precision(tags, pred_ids, num_tags, indices, mask),
            'recall': recall(tags, pred_ids, num_tags, indices, mask),
            'f1': f1(tags, pred_ids, num_tags, indices, mask),
        }
        # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而
        # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0]
        # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
                loss,
                global_step=tf.train.get_or_create_global_step())  # 默认学习率1e-3
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Exemple #21
0
    def __model_fn(self, features, labels, mode, params):
        '模型结构: Bi_LSTM + CRF'
        'features: 特征列; labels: tag列; '
        'mode: tf.estimator.Estimator()自带的参数,用于判定TRAIN EVAL PREDICT三种类型'
        'params: 参数词典'
        # 判断features是那种类型:类型1:((([None],()),([None,None],[None])),[None]),这是self.__input_fn()输出的类型
        # 类型2: {'words':[word1,word2,..],'nwords':number,'chars':[['J','o',..],['l',..],..],'nchars':number},
        # 这是我们在预测时输入的类型
        if isinstance(features, dict):
            features = ((features['words'], features['nwords']),
                        (features['chars'], features['nchars']))

        with tf.name_scope('Read_data'):
            # 获取特征列各项
            (words, nwords), (chars,
                              nchars) = features  # words是单词列表,nwords是其相应的数量
            # 获取汉语单字或英文字母的词包,eg: {char1:int64}
            vocab_chars = tf.contrib.lookup.index_table_from_file(
                params['char_vocabulary'],
                num_oov_buckets=params['num_oov_buckets'])
            # 获取汉语词语或英文单词的词包,eg:{char2:int64}
            vocab_words = tf.contrib.lookup.index_table_from_file(
                params['word_vocabulary'],
                num_oov_buckets=params['num_oov_buckets'])
            # 获取标记对应的索引,不包括用于填充batch的padding_tag
            with Path(params['tags']).open('r', encoding='utf-8') as fi:
                # indices用于存储正类tag的索引,即不包含padding_tag
                indices = [
                    idx for idx, tag in enumerate(fi)
                    if tag.strip() != params.get('padding_tag', 'pad')
                ]
                num_tags = len(indices) + 1  # 总体的tag数量还要加上padding_tag,用于构建转移矩阵
            # 获取汉语单字或英文字母的数量
            with Path(params['char_vocabulary']).open('r',
                                                      encoding='utf-8') as fi:
                # # char的数量还得加上,不在词包中的字符我们给它们的索引数量
                num_chars = sum(1 for _ in fi) + params['num_oov_buckets']
            # 判断模式:训练,评估,预测
            training = (mode == tf.estimator.ModeKeys.TRAIN)

        with tf.name_scope('Char_Embeddings_Layer'):
            char_ids = vocab_chars.lookup(chars)  # 获取字母列表的id列表
            # char2vec = tf.get_variable('char_embeddings',[num_chars,params['char2vec_dim']],tf.float32)
            # char_embeddings = tf.nn.embedding_lookup(char2vec,char_ids)
            # 是否加载外部的汉字单字或英文字母的向量
            if params['if_load_char2vec']:
                char2vec = np.load(
                    params['char2vec'])['embeddings']  # 加载词向量,可通过char_id查找获取
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                char2vec = np.vstack(
                    [char2vec, [[0.] * params['char2vec_dim']]])
                char2vec = tf.Variable(char2vec,
                                       dtype=tf.float32,
                                       trainable=False)  # 词向量表转为tf.tensor,不可训练
                # 获取字母列表中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                # 这里batch是每条输入中的单词个数
                char_embeddings = tf.nn.embedding_lookup(char2vec, char_ids)
            else:
                # 通过模型训练词向量
                with Path(params['char_vocabulary']).open(
                        'r', encoding='utf-8') as fi:
                    char_vocab = [
                        word for idx, word in enumerate(fi)
                        if word.strip() != ''
                    ]
                char2vec = tf.get_variable(
                    'char2vec', [len(char_vocab), params['char2vec_dim']])
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                padding_vec = tf.Variable([[0.] * params['char2vec_dim']],
                                          dtype=tf.float32)
                char2vec = tf.concat([char2vec, padding_vec], axis=0)
                char2vec = tf.Variable(char2vec,
                                       dtype=tf.float32,
                                       trainable=True)  # 词向量表转为tf.tensor,可训练
                # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0
                # 因此需要特别处理一下,每次都需要将char2vec最后一个向量变为全0,我们用mask
                # 再构建一张lookup_table,形状与char2vec一致,其中除了最后一行元素全为0外,其余都是1
                mask = [params['char2vec_dim']
                        ] * len(char_vocab) + [0] * params['char2vec_dim']
                mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32)
                mask_vec = tf.nn.embedding_lookup(mask_lookup_table, char_ids)
                # 获取单词中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                # 这里batch是每条输入中的单词个数
                embeddings = tf.nn.embedding_lookup(char2vec, char_ids)
                # 将char_ids中的padding_tag的向量重置为0
                char_embeddings = tf.multiply(embeddings, mask_vec)

        with tf.name_scope('Char_Embedding_Dropout_Layer'):
            # char_embeddings.shape = (None,None,None,params['char2vec_dim']
            # 第一个None是batch_size,第二个是每条输入中的单词个数
            # 第三个None是每条输入中每个单词包含的字母个数的列表
            char_embeddings = tf.layers.dropout(char_embeddings,
                                                rate=params['dropout'],
                                                training=training)

        with tf.name_scope('Char_LSTM_Layer'):
            dim_words = tf.shape(char_embeddings)[1]  # 当前输入中的单词个数
            dim_chars = tf.shape(char_embeddings)[2]  # 当前输入中的每个单词的字母个数
            flat = tf.reshape(char_embeddings,
                              [-1, dim_chars, params['char2vec_dim']])
            t = tf.transpose(flat, perm=[1, 0, 2])
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['char_lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_fw)
            # 获取正向LSTM最后一时刻的输出
            _, (_, output_fw) = lstm_cell_fw(t,
                                             dtype=tf.float32,
                                             sequence_length=tf.reshape(
                                                 nchars, [-1]))
            # 获取反向LSTM最后一时刻的输出
            _, (_, output_bw) = lstm_cell_bw(t,
                                             dtype=tf.float32,
                                             sequence_length=tf.reshape(
                                                 nchars, [-1]))
            # 将这两个时刻的输出按最后一维度拼接
            output = tf.concat([output_fw, output_bw], axis=-1)
            char_embeddings = tf.reshape(
                output, [-1, dim_words, params['char_lstm_size'] * 2])

        with tf.name_scope('Word_Embeddings_Layer'):
            word_ids = vocab_words.lookup(words)  # 获取单词列表的id列表
            # 是否加载外部的词向量
            if params['if_load_word2vec']:
                word2vec = np.load(
                    params['word2vec'])['embeddings']  # 加载词向量,可通过word_id查找获取
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                word2vec = np.vstack(
                    [word2vec, [[0.] * params['word2vec_dim']]])
                word2vec = tf.Variable(word2vec,
                                       dtype=tf.float32,
                                       trainable=False)  # 词向量表转为tf.tensor,不可训练
                # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                word_embeddings = tf.nn.embedding_lookup(word2vec, word_ids)
            else:
                # 通过模型训练词向量
                with Path(params['word_vocabulary']).open(
                        'r', encoding='utf-8') as fi:
                    vocab = [
                        word for idx, word in enumerate(fi)
                        if word.strip() != ''
                    ]
                word2vec = tf.get_variable(
                    'word2vec', [len(vocab), params['word2vec_dim']])
                # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致
                padding_vec = tf.Variable([[0.] * params['word2vec_dim']],
                                          dtype=tf.float32)
                word2vec = tf.concat([word2vec, padding_vec], axis=0)
                word2vec = tf.Variable(word2vec,
                                       dtype=tf.float32,
                                       trainable=True)  # 词向量表转为tf.tensor,可训练
                # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0
                # 因此需要特别处理一下,每次都需要将word2vec最后一个向量变为全0,我们用mask
                # 再构建一张lookup_table,形状与word2vec一致,其中除了最后一行元素全为0外,其余都是1
                mask = [params['word2vec_dim']
                        ] * len(vocab) + [0] * params['word2vec_dim']
                mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32)
                mask_vec = tf.nn.embedding_lookup(mask_lookup_table, word_ids)
                # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size)
                embeddings = tf.nn.embedding_lookup(word2vec, word_ids)
                # 将word_ids中的padding_tag的向量重置为0
                word_embeddings = tf.multiply(embeddings, mask_vec)

        with tf.name_scope('Concatenate_CharEmbedding_WordEmbedding'):
            embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)

        with tf.name_scope('Dropout_Layer'):
            embeddings = tf.layers.dropout(embeddings,
                                           rate=params['dropout'],
                                           training=training)

        with tf.name_scope('Word_Bi_LSTM'):
            # 将输入形状转为shape=(time_len,batch_size,input_size),方便LSTM计算
            inputs = tf.transpose(embeddings, perm=[1, 0, 2])
            # 正向LSTM
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['word_lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(
                params['word_lstm_size'])
            # 反向LSTM
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
            # 正向每时刻隐藏层状态
            output_fw, _ = lstm_cell_fw(inputs,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            # 反向每时刻隐藏层状态
            output_bw, _ = lstm_cell_bw(inputs,
                                        dtype=tf.float32,
                                        sequence_length=nwords)
            # 将两个方向的状态,按时刻前后拼接在一起,沿最后一轴拼接
            output = tf.concat([output_fw, output_bw], axis=-1)
            # 将output形状再变回来shape = (batch_size,time_len,input_size)
            output = tf.transpose(output, perm=[1, 0, 2])

        with tf.name_scope('LSTM_dropout'):
            output = tf.layers.dropout(output,
                                       rate=params['dropout'],
                                       training=training)

        with tf.name_scope('Fully_connected_layer'):
            # 全连接层计算每一时刻的得分值
            logits = tf.layers.dense(output, num_tags)

        with tf.name_scope('CRF'):
            # CRF转移矩阵
            crf_params = tf.get_variable('crf', [num_tags, num_tags],
                                         dtype=tf.float32)
            # crf解码,pred_ids是预测的标记列表
            pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

        # 判断是训练,评估,还是预测
        if mode == tf.estimator.ModeKeys.PREDICT:
            # 预测
            # 获取标记tag与其索引的字典,格式为{id:tag,..}
            reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
                params['tags'])
            # 将tag的id映射到tag上,获取预测的标记tag
            pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
            # 此字典存储,需要预测的内容
            predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        else:
            # Loss
            # 获取标记与其索引映射表,{tag:id},注意包含了填充标记pad
            vocab_tags = tf.contrib.lookup.index_table_from_file(
                params['tags'])
            # 将真实tag转为id序列
            tags = vocab_tags.lookup(labels)
            # 计算损失函数,负的对数似然
            log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                logits, tags, nwords, crf_params)
            loss = tf.reduce_mean(-log_likelihood)

            # 评估指标
            weights = tf.sequence_mask(nwords)
            metrics = {
                'acc': tf.metrics.accuracy(tags, pred_ids, weights),
                'precision': precision(tags, pred_ids, num_tags, indices,
                                       weights),
                'recall': recall(tags, pred_ids, num_tags, indices, weights),
                'f1': f1(tags, pred_ids, num_tags, indices, weights),
            }

            for metric_name, op in metrics.items():
                tf.summary.scalar(metric_name, op[1])

            # 评估
            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  eval_metric_ops=metrics)
            # 训练
            elif mode == tf.estimator.ModeKeys.TRAIN:
                # 优化器
                train_op = tf.train.AdamOptimizer().minimize(
                    loss, global_step=tf.train.get_or_create_global_step())
                return tf.estimator.EstimatorSpec(mode,
                                                  loss=loss,
                                                  train_op=train_op)
Exemple #22
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars) = features

    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars + 1, params['dim_chars']],
                               tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
    #                                     training=training)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(char_embeddings, weights,
                                            params['char_filters'],
                                            params['char_kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['w2v'])['embeddings']  # np.array
    print("glove shape", glove.shape)
    variable = np.vstack([glove,
                          [[0.] * params['dim']]])  # [vob_size, emb_size]
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # sess = tf.InteractiveSession()
    # emb_shape = sess.run(tf.shape(embeddings))
    # print("-"*50,'emb_shape:',emb_shape)
    # block_unflat_scores shape: [batch_size, max_seq_len, class_num]
    block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False)
    pred_ids = tf.argmax(block_unflat_scores[-1], 2)
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32)
        # input_mask = tf.ones_like(words,dtype=tf.int32)
        # for i, real_seq_len in enumerate(nwords):
        #    input_mask[i, real_seq_len:] = 0
        # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int")
        # for i, real_seq_len in enumerate(nwords.eval()):
        #    input_mask[i, real_seq_len:] = 0

        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            loss = tf.constant(0.0)
            # labels = tf.cast(labels, 'int32')
            # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape])
            # print(block_unflat_scores[-1].shape)
            # tags = tf.Print(tags,[tags.shape])
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=block_unflat_scores[-1], labels=tags)
            # masked_losses = tf.multiply(losses, input_mask)
            # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask))
            loss += tf.reduce_sum(losses)
            loss += params["l2_penalty"] * l2_loss

        # Metrics
        weights = tf.sequence_mask(nwords)
        # tags_min = tf.reduce_min(tags)
        # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min")
        # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags")
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Exemple #23
0
    def model_fn(self, features, labels, mode, params):
        # For serving, features are a bit different

        if isinstance(features, dict):
            features = features['words'], features['nwords']

        # Read vocabs and inputs
        dropout = params['dropout']
        words, nwords = features
        
        #nwords = tf.shape(words)[0]
        #print('###########tf.shape nwords:{}#######'.format(nwords))
        training = (mode == tf.estimator.ModeKeys.TRAIN)
        vocab_words = tf.contrib.lookup.index_table_from_file(
            params['words'], num_oov_buckets=params['num_oov_buckets'])
        if mode == tf.estimator.ModeKeys.PREDICT:
            # Word Embeddings
            word_ids = vocab_words.lookup(words)
            if self.embeding == 'glove':
                glove = np.load(params['glove'])['embeddings']  # np.array
                variable = np.vstack([glove, [[0.]*params['dim']]])
                variable = tf.Variable(variable, dtype=tf.float32, trainable=True)
                embeddings = tf.nn.embedding_lookup(variable, word_ids)
                embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

            # add by panyc
            # with Path(params['words']).open() as f:
            #     vocab_words = f.readlines()
            #     vocab_length = len(vocab_words)
            # end
            else:

                embeddings = tf.Variable(
                    # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0))
                    tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574)
                )
                embeddings = tf.nn.embedding_lookup(embeddings, word_ids)
                embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

            # LSTM
            # t = tf.transpose(embed, perm=[1, 0, 2])
            t = tf.transpose(embeddings, perm=[1, 0, 2])
            lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
            lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
            output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
            output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.transpose(output, perm=[1, 0, 2])
            output = tf.layers.dropout(output, rate=dropout, training=training)

            # CRF
            logits = tf.layers.dense(output, params['num_tags'])
            crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32)
            pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

            # Predictions
            reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
                params['tags'])
            pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
            predictions = {
                'pred_ids': pred_ids,
                'tags': pred_strings
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        else:
            print('##########nwords:{}###########'.format(nwords))
            #words_shards = tf.split(words, self.num_gpus)
            #labels_shards = tf.split(labels, self.num_gpus)
            words = tf.cond(tf.less(tf.shape(words)[0], self.num_gpus), \
                lambda:tf.concat([words]*self.num_gpus,0),lambda:words)
            nwords = tf.cond(tf.less(tf.shape(nwords)[0], self.num_gpus), \
                lambda:tf.concat([nwords]*self.num_gpus,0),lambda:nwords)
            labels = tf.cond(tf.less(tf.shape(labels)[0], self.num_gpus), \
                lambda:tf.concat([labels]*self.num_gpus,0),lambda:labels)
            n = (tf.shape(words)[0]//self.num_gpus ) * self.num_gpus
            words = words[:n]
            nwords = nwords[:n]
            labels = labels[:n]
            words_shards = tf.split(words, self.num_gpus)
            nwords_shards = tf.split(nwords, self.num_gpus)
            labels_shards = tf.split(labels, self.num_gpus)
            loss_shards = []
            grad_shards = []
            metric_accuracy = []
            accuracy_op = None
            metric_precision = []
            precision_op = None
            metric_recall =  []
            recall_op = None
            metric_f1 = []
            f1_op = None
            #nwords = tf.div(nwords, self.num_gpus)
            #nwords=10
            #nwords = tf.constant([nwords,], dtype=tf.int32)
            for i, device in enumerate(self.devices):
                with tf.variable_scope( tf.get_variable_scope(), reuse=True if i > 0 else None):
                    with tf.device(device):
                        words = words_shards[i]
                        nwords = nwords_shards[i]
                        labels = labels_shards[i]
                        word_ids = vocab_words.lookup(words)
                        if self.embeding == 'glove':
                            glove = np.load(params['glove'])['embeddings']  # np.array
                            variable = np.vstack([glove, [[0.]*params['dim']]])
                            variable = tf.Variable(variable, dtype=tf.float32, trainable=True)
                            embeddings = tf.nn.embedding_lookup(variable, word_ids)
                            embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

                        # add by panyc
                        # with Path(params['words']).open() as f:
                        #     vocab_words = f.readlines()
                        #     vocab_length = len(vocab_words)
                        # end
                        else:

                            embeddings = tf.Variable(
                                # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0))
                                tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574)
                            )
                            embeddings = tf.nn.embedding_lookup(embeddings, word_ids)
                            embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

                        # LSTM
                        # t = tf.transpose(embed, perm=[1, 0, 2])
                        t = tf.transpose(embeddings, perm=[1, 0, 2])
                        lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
                        lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
                        lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
                        output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
                        output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
                        output = tf.concat([output_fw, output_bw], axis=-1)
                        output = tf.transpose(output, perm=[1, 0, 2])
                        output = tf.layers.dropout(output, rate=dropout, training=training)

                        # CRF
                        logits = tf.layers.dense(output, params['num_tags'])
                        crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32)
                        pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

                        # Loss

                        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
                        # vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'], num_oov_buckets=params['num_oov_buckets'])
                        tags = vocab_tags.lookup(labels)
                        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
                            logits, tags, nwords, crf_params)
                        loss = tf.reduce_mean(-log_likelihood)
                        loss_shards.append(loss)
                        weights = tf.sequence_mask(nwords,tf.shape(tags)[1])
                        ## add by panyc
                        #weights = tf.expand_dims(weights,axis=0)
                        ## end
                        val,accuracy_op = tf.metrics.accuracy(tags, pred_ids, weights)
                        metric_accuracy.append([val])
                        val,precision_op = precision(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_precision.append([val])
                        val,recall_op = recall(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_recall.append([val])
                        val,f1_op = f1(tags, pred_ids, params['num_tags'], self.indices, weights)
                        metric_f1.append([val])

            loss = tf.reduce_mean(loss_shards)
            metric_accuracy = tf.reduce_mean(metric_accuracy)
            metric_precision = tf.reduce_mean(metric_precision)
            metric_recall = tf.reduce_mean(metric_recall)
            metric_f1 = tf.reduce_mean(metric_f1)
            metrics = {
                'acc': (metric_accuracy,accuracy_op),
                'precision': (metric_precision,precision_op),
                'recall': (metric_recall, recall_op),
                'f1': (metric_f1, f1_op),
            }
            # Metrics
            #weights = tf.sequence_mask(nwords)
            for metric_name, op in metrics.items():
                print('############op##########')
                print(op)
                tf.summary.scalar(metric_name, op[1])

            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(
                    mode, loss=loss, eval_metric_ops=metrics)

            elif mode == tf.estimator.ModeKeys.TRAIN:
                # train_op = tf.train.AdamOptimizer().minimize(
                #     loss, global_step=tf.train.get_or_create_global_step())
                train_op = tf.train.AdamOptimizer(learning_rate=self.params['learnning_rate']).minimize(
                    loss, global_step=tf.train.get_or_create_global_step())
                return tf.estimator.EstimatorSpec(
                    mode, loss=loss, train_op=train_op)
Exemple #24
0
 def build_metrics(self):
     with tf.name_scope('Metrics'):
         average = 'micro'
         with tf.name_scope('Train'):
             self.train_target_precision, self.train_target_precision_op = tf_metrics.precision(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.train_target_recall, self.train_target_recall_op = tf_metrics.recall(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.train_target_f1, self.train_target_f1_op = tf_metrics.f1(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.train_sentiment_precision, self.train_sentiment_precision_op = tf_metrics.precision(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
             self.train_sentiment_recall, self.train_sentiment_recall_op = tf_metrics.recall(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
             self.train_sentiment_f1, self.train_sentiment_f1_op = tf_metrics.f1(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
         with tf.name_scope('Test'):
             self.test_target_precision, self.test_target_precision_op = tf_metrics.precision(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.test_target_recall, self.test_target_recall_op = tf_metrics.recall(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.test_target_f1, self.test_target_f1_op = tf_metrics.f1(
                 self.y_target,
                 self.target_preds,
                 self.C_tar, [i for i in range(1, self.C_tar)],
                 average=average)
             self.test_sentiment_precision, self.test_sentiment_precision_op = tf_metrics.precision(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
             self.test_sentiment_recall, self.test_sentiment_recall_op = tf_metrics.recall(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
             self.test_sentiment_f1, self.test_sentiment_f1_op = tf_metrics.f1(
                 self.y_sentiment,
                 self.sentiment_preds,
                 self.C_sent, [i for i in range(1, self.C_sent)],
                 average=average)
Exemple #25
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
        params['num_tags'] = num_tags

    # Graph
    (words, nwords), (chars, nchars) = features
    logits, crf_params = graph_fn(features, labels, mode, params)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    # Moving Average
    variables = tf.get_collection('trainable_variables', 'graph')
    ema = tf.train.ExponentialMovingAverage(0.999)
    ema_op = ema.apply(variables)
    logits_ema, crf_params_ema = graph_fn(features,
                                          labels,
                                          mode,
                                          params,
                                          reuse=True,
                                          getter=ema_getter(ema))
    pred_ids_ema, _ = tf.contrib.crf.crf_decode(logits_ema, crf_params_ema,
                                                nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        pred_strings_ema = reverse_vocab_tags.lookup(tf.to_int64(pred_ids_ema))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings,
            'pred_ids_ema': pred_ids_ema,
            'tags_ema': pred_strings_ema,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'acc_ema': tf.metrics.accuracy(tags, pred_ids_ema, weights),
            'pr': precision(tags, pred_ids, num_tags, indices, weights),
            'pr_ema': precision(tags, pred_ids_ema, num_tags, indices,
                                weights),
            'rc': recall(tags, pred_ids, num_tags, indices, weights),
            'rc_ema': recall(tags, pred_ids_ema, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
            'f1_ema': f1(tags, pred_ids_ema, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss,
                global_step=tf.train.get_or_create_global_step(),
                var_list=variables)
            train_op = tf.group([train_op, ema_op])
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
def model_fn(features, labels, mode, params):
    if isinstance(features, dict):
        features = features['words'], features['nwords']

    dropout = params['dropout']
    words, nwords = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1

    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    embeddings = tf.nn.embedding_lookup(variable, word_ids)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    t = tf.transpose(embeddings, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    dense_layer = tf.layers.Dense(num_tags)
    logits = dense_layer(output)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        ##########
        epsilon = 5
        perturbed = _add_perturbation(embeddings, loss, epsilon)

        t = tf.transpose(perturbed, perm=[1, 0, 2])

        output_fw1, _1 = lstm_cell_fw(t,
                                      dtype=tf.float32,
                                      sequence_length=nwords)
        output_bw1, _1 = lstm_cell_bw(t,
                                      dtype=tf.float32,
                                      sequence_length=nwords)
        output1 = tf.concat([output_fw1, output_bw1], axis=-1)
        output1 = tf.transpose(output1, perm=[1, 0, 2])
        output1 = tf.layers.dropout(output1, rate=dropout, training=training)

        logits1 = dense_layer(output1)

        log_likelihood1, _1 = tf.contrib.crf.crf_log_likelihood(
            logits1, tags, nwords, crf_params)
        adv_loss = tf.reduce_mean(-log_likelihood1)

        loss += adv_loss
        ##########

        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Exemple #27
0
def model_fn(mode, features, labels, params):
    model_params = ModelParams(**params['model_params'])
    training_params = TrainingParams.from_dict(params['training_params'])
    prediction_type = params['prediction_type']
    classes_file = params['classes_file']

    input_images = features['images']

    if mode == tf.estimator.ModeKeys.PREDICT:
        margin = training_params.training_margin
        input_images = tf.pad(
            input_images, [[0, 0], [margin, margin], [margin, margin], [0, 0]],
            mode='SYMMETRIC',
            name='mirror_padding')

    if model_params.pretrained_model_name == 'vgg16':
        network_output = inference_vgg16(
            input_images,
            model_params,
            model_params.n_classes,
            use_batch_norm=model_params.batch_norm,
            weight_decay=model_params.weight_decay,
            is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        key_restore_model = 'vgg_16'

    elif model_params.pretrained_model_name == 'resnet50':
        # Modified by me: added 'selected_intermediate_layers'
        network_output, selected_intermediate_layers = inference_resnet_v1_50(
            input_images,
            model_params,
            model_params.n_classes,
            use_batch_norm=model_params.batch_norm,
            weight_decay=model_params.weight_decay,
            is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        key_restore_model = 'resnet_v1_50'
    #added for classification
    elif model_params.pretrained_model_name == 'resnet50_classification':
        network_output, selected_intermediate_layers = inference_resnet_v1_50_classification(
            input_images,
            model_params,
            model_params.n_classes,
            use_batch_norm=model_params.batch_norm,
            weight_decay=model_params.weight_decay,
            is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        key_restore_model = 'resnet_v1_50'
    elif model_params.pretrained_model_name == 'unet':
        network_output = inference_u_net(
            input_images,
            model_params,
            model_params.n_classes,
            use_batch_norm=model_params.batch_norm,
            weight_decay=model_params.weight_decay,
            is_training=(mode == tf.estimator.ModeKeys.TRAIN))
        key_restore_model = None
    else:
        raise NotImplementedError

    if mode == tf.estimator.ModeKeys.TRAIN:
        if key_restore_model is not None:
            # Pretrained weights as initialization
            pretrained_restorer = tf.train.Saver(var_list=[
                v for v in tf.global_variables() if key_restore_model in v.name
            ])

            def init_fn(scaffold, session):
                pretrained_restorer.restore(session,
                                            model_params.pretrained_model_file)
        else:
            init_fn = None
    else:
        init_fn = None

    if mode == tf.estimator.ModeKeys.PREDICT:
        pass
        #margin = training_params.training_margin
        # Crop padding
        #if margin > 0:
        #    network_output = network_output[:, margin:-margin, margin:-margin, :]

    # Prediction
    # ----------
    # Added by me: second dictionary
    intermediate_layers_dict = {}
    if prediction_type == PredictionType.CLASSIFICATION:
        #squeezed for image classification
        #network_output = tf.Print(network_output, [tf.shape(tf.nn.softmax(tf.squeeze(network_output))), tf.shape(labels)])
        prediction_probs = tf.nn.softmax(tf.squeeze(network_output),
                                         name='softmax')
        prediction_labels = tf.argmax(tf.squeeze(network_output),
                                      axis=-1,
                                      name='label_preds')
        predictions = {'probs': prediction_probs, 'labels': prediction_labels}

        # Added by me: second dictionary
        desired_endpoints = [
            'resnet_v1_50/conv1', 'resnet_v1_50/block1/unit_3/bottleneck_v1',
            'resnet_v1_50/block2/unit_4/bottleneck_v1',
            'resnet_v1_50/block3/unit_6/bottleneck_v1',
            'resnet_v1_50/block4/unit_3/bottleneck_v1'
        ]

        #added by me, commented due to classification
        for index, selected_intermediate_layer in enumerate(
                selected_intermediate_layers):
            intermediate_layers_dict[
                desired_endpoints[index]] = selected_intermediate_layer
    elif prediction_type == PredictionType.REGRESSION:
        predictions = {'output_values': network_output}
        prediction_labels = network_output
    elif prediction_type == PredictionType.MULTILABEL:
        with tf.name_scope('prediction_ops'):
            prediction_probs = tf.nn.sigmoid(network_output,
                                             name='sigmoid')  # [B,H,W,C]
            prediction_labels = tf.cast(
                tf.greater_equal(prediction_probs, 0.5, name='labels'),
                tf.int32)  # [B,H,W,C]
            predictions = {
                'probs': prediction_probs,
                'labels': prediction_labels
            }
    else:
        raise NotImplementedError

    # Loss
    # ----
    if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
        regularized_loss = tf.losses.get_regularization_loss()
        if prediction_type == PredictionType.CLASSIFICATION:
            #onehot_labels = tf.one_hot(indices=labels, depth=model_params.n_classes)
            #network_output = tf.Print(network_output, [network_output])
            with tf.name_scope("loss"):
                per_pixel_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=tf.squeeze(network_output),
                    labels=labels,
                    name='per_pixel_loss')
                #per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output,
                #                                                         labels=onehot_labels, name='per_pixel_loss')

                #if training_params.focal_loss_gamma > 0.0:
                #    # Probability per pixel of getting the correct label
                #    probs_correct_label = tf.reduce_max(tf.multiply(prediction_probs, onehot_labels))
                #    modulation = tf.pow((1. - probs_correct_label), training_params.focal_loss_gamma)
                #    per_pixel_loss = tf.multiply(per_pixel_loss, modulation)

                if training_params.weights_labels is not None:
                    weight_mask = tf.reduce_sum(tf.constant(
                        np.array(training_params.weights_labels,
                                 dtype=np.float32)[None, None, None]) *
                                                onehot_labels,
                                                axis=-1)
                    per_pixel_loss = per_pixel_loss * weight_mask
                if training_params.local_entropy_ratio > 0:
                    assert 'weight_maps' in features
                    r = training_params.local_entropy_ratio
                    per_pixel_loss = per_pixel_loss * (
                        (1 - r) + r * features['weight_maps'])

        elif prediction_type == PredictionType.REGRESSION:
            per_pixel_loss = tf.squared_difference(labels,
                                                   network_output,
                                                   name='per_pixel_loss')
        elif prediction_type == PredictionType.MULTILABEL:
            with tf.name_scope('sigmoid_xentropy_loss'):
                labels_floats = tf.cast(labels, tf.float32)
                per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=labels_floats,
                    logits=network_output,
                    name='per_pixel_loss')
                if training_params.weights_labels is not None:
                    weight_mask = tf.maximum(
                        tf.reduce_max(tf.constant(
                            np.array(training_params.weights_labels,
                                     dtype=np.float32)[None, None, None]) *
                                      labels_floats,
                                      axis=-1), 1.0)
                    per_pixel_loss = per_pixel_loss * weight_mask[:, :, :,
                                                                  None]
        else:
            raise NotImplementedError

        margin = training_params.training_margin
        input_shapes = features['shapes']
        with tf.name_scope('Loss'):

            def _fn(_in):
                output, shape = _in
                return tf.reduce_mean(output[margin:shape[0] - margin,
                                             margin:shape[1] - margin])

            #per_img_loss = tf.map_fn(_fn, (per_pixel_loss, input_shapes), dtype=tf.float32)
            per_img_loss = per_pixel_loss
            loss = tf.reduce_mean(per_img_loss, name='loss')

        loss += regularized_loss
    else:
        loss, regularized_loss = None, None

    # Train
    # -----
    if mode == tf.estimator.ModeKeys.TRAIN:
        # >> Stucks the training... Why ?
        # ema = tf.train.ExponentialMovingAverage(0.9)
        # tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, ema.apply([loss]))
        # ema_loss = ema.average(loss)

        if training_params.exponential_learning:
            global_step = tf.train.get_or_create_global_step()
            learning_rate = tf.train.exponential_decay(
                training_params.learning_rate,
                global_step,
                decay_steps=200,
                decay_rate=0.95,
                staircase=False)
        else:
            learning_rate = training_params.learning_rate
        tf.summary.scalar('learning_rate', learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.minimize(
                loss, global_step=tf.train.get_or_create_global_step())
    else:
        ema_loss, train_op = None, None

    # Summaries
    # ---------
    if mode == tf.estimator.ModeKeys.TRAIN:
        with tf.name_scope('summaries'):
            tf.summary.scalar('losses/loss', loss)
            tf.summary.scalar('losses/loss_per_batch', loss)
            tf.summary.scalar('losses/regularized_loss', regularized_loss)
            if prediction_type == PredictionType.CLASSIFICATION:
                pass
                #tf.summary.image('output/prediction',
                #                 tf.image.resize_images(class_to_label_image(prediction_labels, classes_file),
                #                                        tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)),
                #                 max_outputs=1)
                #if model_params.n_classes == 3:
                #    tf.summary.image('output/probs',
                #                     tf.image.resize_images(prediction_probs[:, :, :, :],
                #                                            tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)),
                #                     max_outputs=1)
                #if model_params.n_classes == 2:
                #    tf.summary.image('output/probs',
                #                     tf.image.resize_images(prediction_probs[:, :, :, 1:2],
                #                                            tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)),
                #                     max_outputs=1)
            elif prediction_type == PredictionType.REGRESSION:
                summary_img = tf.nn.relu(
                    network_output)[:, :, :,
                                    0:1]  # Put negative values to zero
                tf.summary.image('output/prediction',
                                 summary_img,
                                 max_outputs=1)
            elif prediction_type == PredictionType.MULTILABEL:
                labels_visualization = tf.cast(prediction_labels, tf.int32)
                labels_visualization = multiclass_to_label_image(
                    labels_visualization, classes_file)
                tf.summary.image('output/prediction_image',
                                 tf.image.resize_images(
                                     labels_visualization,
                                     tf.cast(
                                         tf.shape(labels_visualization)[1:3] /
                                         3, tf.int32)),
                                 max_outputs=1)
                class_dim = prediction_probs.get_shape().as_list()[-1]
                for c in range(0, class_dim):
                    tf.summary.image('output/prediction_probs_{}'.format(c),
                                     tf.image.resize_images(
                                         prediction_probs[:, :, :, c:c + 1],
                                         tf.cast(
                                             tf.shape(network_output)[1:3] / 3,
                                             tf.int32)),
                                     max_outputs=1)

                    # beta = tf.get_default_graph().get_tensor_by_name('upsampling/deconv_5/conv5/batch_norm/beta/read:0')
                    # tf.summary.histogram('Beta', beta)

    # Evaluation
    # ----------
    if mode == tf.estimator.ModeKeys.EVAL:
        if prediction_type == PredictionType.CLASSIFICATION:
            metrics = {
                'eval/accuracy':
                tf.metrics.accuracy(labels, predictions=prediction_labels)
            }

            nr_classes = params['model_params']['n_classes']

            for class_id in range(nr_classes):
                condition = tf.logical_or(
                    tf.equal(class_id, tf.squeeze(labels)),
                    tf.equal(class_id,
                             tf.cast(tf.squeeze(prediction_labels), tf.int32)))
                weights = tf.cond(condition, lambda: 1, lambda: 0)
                precision_key = 'eval/precision_class_{}'.format(class_id)
                recall_key = 'eval/recall_class_{}'.format(class_id)

                pred = tf.reshape(tf.expand_dims(prediction_labels, axis=0),
                                  [1])
                lab = tf.reshape(tf.expand_dims(labels, axis=0), [1])
                weights = tf.reshape(tf.expand_dims(weights, axis=0), [1])
                precision = tf_metrics.precision(labels=lab,
                                                 predictions=pred,
                                                 num_classes=nr_classes,
                                                 pos_indices=[class_id],
                                                 weights=weights,
                                                 average='micro')
                recall = tf_metrics.recall(labels=lab,
                                           predictions=pred,
                                           num_classes=nr_classes,
                                           pos_indices=[class_id],
                                           weights=weights,
                                           average='micro')
                metrics[precision_key] = precision
                metrics[recall_key] = recall

            precision_key = 'eval/macro_mean_precision_per_class'
            recall_key = 'eval/macro_mean_recall_per_class'
            mean_precision = tf_metrics.precision(labels=lab,
                                                  predictions=pred,
                                                  num_classes=nr_classes,
                                                  average='macro')
            mean_recall = tf_metrics.recall(labels=lab,
                                            predictions=pred,
                                            num_classes=nr_classes,
                                            average='macro')
            metrics[precision_key] = mean_precision
            metrics[recall_key] = mean_recall

            precision_key = 'eval/weighted_mean_precision_per_class'
            recall_key = 'eval/weighted_mean_recall_per_class'
            mean_precision = tf_metrics.precision(labels=lab,
                                                  predictions=pred,
                                                  num_classes=nr_classes,
                                                  average='weighted')
            mean_recall = tf_metrics.recall(labels=lab,
                                            predictions=pred,
                                            num_classes=nr_classes,
                                            average='weighted')
            metrics[precision_key] = mean_precision
            metrics[recall_key] = mean_recall
        elif prediction_type == PredictionType.REGRESSION:
            metrics = {
                'eval/accuracy':
                tf.metrics.mean_squared_error(labels,
                                              predictions=prediction_labels)
            }
        elif prediction_type == PredictionType.MULTILABEL:
            metrics = {
                'eval/MSE':
                tf.metrics.mean_squared_error(tf.cast(labels, tf.float32),
                                              predictions=prediction_probs),
                'eval/accuracy':
                tf.metrics.accuracy(tf.cast(labels, tf.bool),
                                    predictions=tf.cast(
                                        prediction_labels, tf.bool))
            }
    else:
        metrics = None

    # Export
    # ------
    if mode == tf.estimator.ModeKeys.PREDICT:

        export_outputs = dict()

        if 'original_shape' in features.keys():
            with tf.name_scope('ResizeOutput'):
                #      resized_predictions = dict()
                # Resize all the elements in predictions
                #     for k, v in predictions.items():
                # Labels is rank-3 so we need to be careful in using tf.image.resize_images
                #        assert isinstance(v, tf.Tensor)
                #       v2 = v if len(v.get_shape()) == 4 else v[:, :, :, None]
                #      v2 = tf.image.resize_images(v2, features['original_shape'],
                #                                method=tf.image.ResizeMethod.BILINEAR if v.dtype == tf.float32
                #                                else tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                #     v2 = v2 if len(v.get_shape()) == 4 else v2[:, :, :, 0]
                #     resized_predictions[k] = v2
                # export_outputs['resized_output'] = tf.estimator.export.PredictOutput(resized_predictions)

                #added by me: second dictionary
                intermediate_predictions = dict()
                for k, v in intermediate_layers_dict.items():
                    assert isinstance(v, tf.Tensor)
                    intermediate_predictions[k] = v
                export_outputs[
                    'intermediate_layers'] = tf.estimator.export.PredictOutput(
                        intermediate_predictions)

            predictions['original_shape'] = features['original_shape']
        #added by me: second dictionary
        predictions.update(intermediate_layers_dict)
        export_outputs['output'] = tf.estimator.export.PredictOutput(
            predictions)

        export_outputs[
            tf.saved_model.signature_constants.
            DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['output']
    else:
        export_outputs = None

    return tf.estimator.EstimatorSpec(
        mode,
        predictions=predictions,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        export_outputs=export_outputs,
        scaffold=tf.train.Scaffold(init_fn=init_fn))
Exemple #28
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']),
                    (features['jasos'], features['njasos']))

    # Read vocabs and inputs
    (words, nwords), (chars, nchars), (jasos, njasos) = features
    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    vocab_jasos = tf.contrib.lookup.index_table_from_file(
        params['jasos'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open(encoding="utf8") as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']
    with Path(params['jasos']).open(encoding="utf8") as f:
        num_jasos = sum(1 for _ in f) + params['num_oov_buckets']

    # jaos embedding
    jaso_ids = vocab_jasos.lookup(jasos)
    variable = tf.get_variable('jasos_embeddings',
                               [num_jasos, params['dim_chars']], tf.float32)
    jaso_embeddings = tf.nn.embedding_lookup(variable, jaso_ids)
    jaso_embeddings = tf.layers.dropout(jaso_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char LSTM
    dim_words = tf.shape(jaso_embeddings)[1]
    dim_chars = tf.shape(jaso_embeddings)[2]
    flat = tf.reshape(jaso_embeddings, [-1, dim_chars, params['dim_chars']])
    t = tf.transpose(flat, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    output = tf.concat([output_fw, output_bw], axis=-1)
    jaso_embeddings = tf.reshape(output, [-1, dim_words, 50])

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char LSTM
    dim_words = tf.shape(char_embeddings)[1]
    dim_chars = tf.shape(char_embeddings)[2]
    flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']])
    t = tf.transpose(flat, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    output = tf.concat([output_fw, output_bw], axis=-1)
    char_embeddings = tf.reshape(output, [-1, dim_words, 50])
    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    fasttext = np.load(params['fasttext'])['embeddings']  # np.array
    variable = np.vstack([fasttext, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32)  #, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings, jaso_embeddings],
                           axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Exemple #29
0
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'], features['nchars']))

    # Read vocabs and inputs
    (words, nwords), (chars, nchars) = features

    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)  #[[a,b][c,z]] => [[0,1][2,25]]
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']],
                               tf.float32)  #dimension char embeddings [86,100]
    char_embeddings = tf.nn.embedding_lookup(
        variable, char_ids
    )  #char_ids [0,1] 0 va prendre le premier vecteur (variable [0,:]), donc [[0,1][2,25]] => [[variable[0,:],variable[1,:]][variable[2,:],variable[25,:]]]
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)  #50% de l'entrée

    # Char LSTM
    dim_words = tf.shape(
        char_embeddings
    )[1]  #max dim word (time len)(or number of chars max of a word)[nombre de phrase(batch),nombre de mots max,time len, dim char 100]
    dim_chars = tf.shape(
        char_embeddings
    )[2]  #dimension de char 100 [nombre de phrase(batch),nombre de mots max,time len ,dim char 100]

    flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']
                                        ])  #[?,max len word(or time len),100]
    t = tf.transpose(flat,
                     perm=[1, 0,
                           2])  #[max len word(or time len),?,100] time major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(
                                         nchars, [-1]))  #we take last state
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(
                                         nchars, [-1]))  #we take last state

    output = tf.concat(
        [output_fw, output_bw],
        axis=-1)  #concat on the last D dimension of tensors 25+25

    char_embeddings_lstm = tf.reshape(
        output, [-1, params['char_lstm_size'] * 2])  # [b,t,D]
    char_embeddings_lstm = tf.expand_dims(char_embeddings_lstm, -2)

    # Char 1d convolution
    weights = tf.sequence_mask(nchars)
    char_embeddings_cnn = masked_conv1d_and_max(char_embeddings, weights,
                                                params['filters'],
                                                params['kernel_size'])
    char_embeddings_cnn = tf.reshape(char_embeddings_cnn,
                                     [-1, params['filters']])
    char_embeddings_cnn = tf.expand_dims(char_embeddings_cnn, -2)

    #concat cnn and lstm char embeddings
    char_embeddings = tf.concat([char_embeddings_cnn, char_embeddings_lstm],
                                axis=-2)

    #attention
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(char_embeddings,
                                             params['char_lstm_size'] * 2,
                                             time_major=False,
                                             return_alphas=True)
        tf.summary.histogram('alphas', alphas)

    char_embeddings = tf.reshape(attention_output,
                                 [-1, dim_words, params['char_lstm_size'] * 2])

    # Word Embeddings
    word_ids = vocab_words.lookup(
        words
    )  #[[b'Peter', b'Blackburn'],[b'Yac', b'Amirat']] => [[b'0', b'1'],[b'2', b'3']]
    glove = np.load(params['glove'])[
        'embeddings']  # np.array glove made of vocab words (reduces list)
    variable = np.vstack([glove, [[0.] * params['dim']]
                          ])  #concatenate on -1 axis, glove + [[0.]]
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(
        variable, word_ids
    )  #[[b'0', b'1'],[b'2', b'3']] => [[b'variable[0]', b'variable[1]'],[b'variable[2]', b'variable[3]']] [2,2,300]

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings],
                           axis=-1)  #concat on the last dimension axis 100+300
    embeddings = tf.layers.dropout(embeddings, rate=dropout,
                                   training=training)  #50% de l'entrée

    # LSTM for lstm
    t = tf.transpose(
        embeddings, perm=[1, 0, 2]
    )  # Need time-major #put the word dim as first dimension. check batch-major VS time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    #ELMO
    elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False)
    word_embeddings = elmo(inputs={
        "tokens": words,
        "sequence_len": nwords
    },
                           signature="tokens",
                           as_dict=True)["elmo"]

    # Concatenate output LSTM1 and ELMO Embeddings, dropout
    embeddings = tf.concat([word_embeddings, output], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM 2
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(
        output, num_tags
    )  #nn dense input : (output of bilstm), output dimension : same shape excpet last dim will be num of tags
    crf_params = tf.get_variable(
        "crf", [num_tags, num_tags],
        dtype=tf.float32)  #variable of crf pars matrix num_tags*num_tags
    pred_ids, _ = tf.contrib.crf.crf_decode(
        logits, crf_params, nwords
    )  #decode_tags: A [batch_size, max_seq_len] matrix, with dtype tf.int32. Contains the highest scoring tag indices.
    #potentials(logits): A [batch_size, max_seq_len, num_tags] tensor of unary potentials.

    if mode == tf.estimator.ModeKeys.PREDICT:  #prediction
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(
            tf.to_int64(pred_ids)
        )  #indices = tf.constant([1, 5], tf.int64) => ["lake", "UNKNOWN"]
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(
            params['tags'])  #get tags index from file
        tags = vocab_tags.lookup(labels)  #replace lables by thier indexes
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params
        )  #calculate log_likelihood given the real tags, return: A [batch_size] Tensor containing the log-likelihood of each example, given the sequence of tag indices.
        loss = tf.reduce_mean(
            -log_likelihood
        )  #Computes the mean of elements across dimensions of a tensor. x = tf.constant([[1., 1.], [2., 2.]]) tf.reduce_mean(x)  # 1.5

        # Metrics
        weights = tf.sequence_mask(
            nwords
        )  #convert the vector of size n to a matrix of bool of size n * max value in the vector v[1,2] ==> m[[true,false],[true, true]]
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(
                tags, pred_ids, num_tags, indices, weights
            ),  #ground truth, predictions, num of tags 9, The indices of the positive classes, 
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(
                metric_name, op[1]
            )  #for tensor board#tuple of (scalar float Tensor, update_op) op[1] => update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy.

        if mode == tf.estimator.ModeKeys.EVAL:  #Eval
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:  #training
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step()
            )  #adam optimizer operation to optimize the loss, global_step: Optional Variable to increment by one after the variables have been updated.
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
def model_fn(features, labels, mode, params):
    # Read vocabs and inputs
    dropout = params['dropout']
    (words, nwords), (chars, nchars), add_features = features
    add_features = add_features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open() as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']

    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable(
        'chars', [num_chars + 1, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
                                        training=training)

    # Char LSTM
    weights = tf.sequence_mask(nchars)
    char_embeddings = masked_conv1d_and_max(
        char_embeddings, weights, params['filters'], params['kernel_size'])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    glove = np.load(params['glove'])['embeddings']  # np.array
    variable = np.vstack([glove, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)


   #Attention
    #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True)
    num_units = 200
    #W1 =  tf.get_variable("W1", [num_units, num_units], dtype=tf.float32)
    W1 =  tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32)
    b1 =  tf.get_variable("b1", [num_units, ], dtype=tf.float32)
    q = tf.tensordot(output, W1, axes=[[2], [0]])
    out_shape = tf.shape(output)
    #b1_shuffled = self.b1.dimshuffle('x', 'x', 0)
    b1_shuffled = tf.expand_dims(b1, 0)
    b1_shuffled = tf.expand_dims(b1_shuffled, 0)
    #print("b shape",tf.shape(b1_shuffled))
    q += b1_shuffled
    q = tf.tanh(q)
    q_trans = tf.transpose(q, perm=[0, 2, 1])
    #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1))
    out = tf.matmul(q, q_trans)
    #print("out dimension",out.shape)
    out *= (1 - tf.eye(out_shape[1], out_shape[1]))
    matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1])))
    matrix =  tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1]))
    #print("new dimension",matrix.shape)
    atten_out = tf.matmul(matrix,output)
    #print("atten dimension",atten_out.shape)
    #print("output dimension",output.shape)
    output = tf.concat([output, atten_out], axis=-1)
    output = tf.layers.dropout(output, rate=dropout, training=training)



    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {
            'pred_ids': pred_ids,
            'tags': pred_strings
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, train_op=train_op)
def model_fn(mode, features, labels):
    # Logging
    Path('results').mkdir(exist_ok=True)
    tf.logging.set_verbosity(logging.INFO)
    handlers = [
        logging.FileHandler('./results/main.log'),
        logging.StreamHandler(sys.stdout)
    ]
    logging.getLogger('tensorflow').handlers = handlers

    word_inputs, char_inputs = features

    training = (mode == tf.estimator.ModeKeys.TRAIN)

    # Embeddings
    embeddings = tf.get_variable('embeddings',
                                 [cfg.num_chars + 2, cfg.char_embed_dim])
    char_input_emb = tf.nn.embedding_lookup(embeddings, char_inputs)

    # Reshaping for CNN
    output = tf.reshape(char_input_emb,
                        [-1, tf.shape(char_inputs)[2], cfg.char_embed_dim])

    # CNN
    output = tf.layers.conv1d(output,
                              filters=64,
                              kernel_size=2,
                              strides=1,
                              padding="same",
                              activation=tf.nn.relu)
    output = tf.layers.max_pooling1d(output, pool_size=2, strides=2)
    output = tf.layers.conv1d(output,
                              filters=128,
                              kernel_size=2,
                              strides=1,
                              padding="same",
                              activation=tf.nn.relu)
    output = tf.layers.max_pooling1d(output, pool_size=2, strides=2)

    cnn_output = tf.layers.dropout(output, rate=.5, training=training)
    cnn_output = tf.layers.flatten(cnn_output)

    # Reshaping CNN and concatenating for LSTM
    cnn_output = tf.reshape(
        cnn_output,
        [-1, tf.shape(char_inputs)[1], 128 * int(cfg.word_max_len / 4)])
    word_inputs = tf.layers.dropout(word_inputs, rate=.5, training=training)
    lstm_inputs = tf.concat([word_inputs, cnn_output], axis=-1)

    # LSTM
    fw_cell = tf.contrib.rnn.LSTMCell(num_units=cfg.lstm_units)
    bw_cell = tf.contrib.rnn.LSTMCell(num_units=cfg.lstm_units)
    (fw_outputs, bw_outputs), (fw_state,
                               bw_state) = tf.nn.bidirectional_dynamic_rnn(
                                   fw_cell,
                                   bw_cell,
                                   lstm_inputs,
                                   dtype=tf.float32)

    # Attention
    W = tf.Variable(tf.random_normal([cfg.lstm_units], stddev=0.1))
    H = fw_outputs + bw_outputs
    M = tf.tanh(H)
    alpha = tf.nn.softmax(
        tf.reshape(
            tf.matmul(tf.reshape(M, [-1, cfg.lstm_units]),
                      tf.reshape(W, [-1, 1])), (-1, tf.shape(word_inputs)[1])))
    r = tf.matmul(tf.transpose(H, [0, 2, 1]),
                  tf.reshape(alpha, [-1, tf.shape(word_inputs)[1], 1]))
    r = tf.squeeze(r)
    h_star = tf.tanh(r)
    h_drop = tf.nn.dropout(h_star, .5)

    # Dense
    FC_W = tf.Variable(tf.truncated_normal([cfg.lstm_units, 2], stddev=0.1))
    FC_b = tf.Variable(tf.constant(0., shape=[2]))
    logits = tf.nn.xw_plus_b(h_drop, FC_W, FC_b)

    # Loss
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))

    # Gradient clipping
    # optimizer = tf.train.AdamOptimizer(1e-4)
    # gradients, variables = zip(*optimizer.compute_gradients(loss))
    # gradients, _ = tf.clip_by_global_norm(gradients, .1)
    # train_op = optimizer.apply_gradients(zip(gradients, variables), tf.train.get_global_step())

    # Metrics
    indices = [0, 1]
    labels = tf.argmax(labels, 1)
    pred_ids = tf.argmax(logits, 1)

    metrics = {
        'acc': tf.metrics.accuracy(labels, pred_ids),
        'precision': precision(labels,
                               pred_ids,
                               2,
                               indices,
                               None,
                               average='macro'),
        'recall': recall(labels, pred_ids, 2, indices, None, average='macro'),
        'f1': f1(labels, pred_ids, 2, indices, None, average='macro')
    }

    for metric_name, op in metrics.items():
        tf.summary.scalar(metric_name, op[1])

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          eval_metric_ops=metrics)

    elif mode == tf.estimator.ModeKeys.TRAIN:
        train_op = tf.train.AdamOptimizer(cfg.learning_rate).minimize(
            loss, global_step=tf.train.get_or_create_global_step())

        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)