def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") recall = tf_metrics.recall(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") f = tf_metrics.f1(label_ids, predictions, 11, [1, 2, 4, 5, 6, 7, 8, 9], average="macro") loss = tf.metrics.mean(per_example_loss) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, "eval_loss": loss, }
def metric_fn(label_ids, pred_ids, per_example_loss, input_mask): # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"] indices = [2, 3, 4, 5, 6, 7, 8, 9] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, input_mask) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, input_mask) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, input_mask) accuracy = tf.metrics.accuracy(label_ids, pred_ids, input_mask) loss = tf.metrics.mean(per_example_loss) return { 'eval_precision': precision, 'eval_recall': recall, 'eval_f': f, 'eval_accuracy': accuracy, 'eval_loss': loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, FLAGS.num_labels, [1, 2], average="macro") recall = tf_metrics.recall(label_ids, predictions, FLAGS.num_labels, [1, 2], average="macro") f = tf_metrics.f1(label_ids, predictions, FLAGS.num_labels, [1, 2], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, }
def metric_fn(label_ids, pred_ids, num_labels): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) pos_indices = [id for id in range(2, num_labels - 3)] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, # 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] precision = tf_metrics.precision( label_ids, pred_ids, num_labels, pos_indices, average="micro") recall = tf_metrics.recall( label_ids, pred_ids, num_labels, pos_indices, average="micro") f = tf_metrics.f1(label_ids, pred_ids, num_labels, pos_indices, average="micro") # hook_dict['precision'] = precision # hook_dict['recall'] = recall # hook_dict['f'] = f # tf.summary.scalar('precision', precision) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) pos_indices = list(range(2, num_labels - 3)) precision = tf_metrics.precision(label_ids, predictions, num_labels, pos_indices, average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, pos_indices, average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, pos_indices, average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, }
def eval_phase(label_ids, pred_ids, num_labels): # 首先对结果进行维特比解码 # crf 解码 eval_list = [] assert True == os.path.exists( os.path.join(FLAGS.output_dir, "eval_ids_list.txt")) list_file = open(os.path.join(FLAGS.output_dir, "eval_ids_list.txt"), 'r') contents = list_file.readlines() for item in contents: eval_list.append(int( item.strip())) ## 记得把字符转回来int类型,后面的评测都是基于int类型的list的 assert 0 < len(eval_list) print("eval_list:", eval_list) weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("precision", precision[1]) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("recall", recall[1]) f = tf_metrics.f1(label_ids, pred_ids, num_labels, eval_list, weight) tf.summary.scalar("f1", f[1]) return (precision, recall, f)
def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) recall = tf_metrics.recall(label_ids, predictions, num_classes=num_labels, average='micro') precision = tf_metrics.precision(label_ids, predictions, num_classes=num_labels, average='micro') f1 = tf_metrics.f1(label_ids, predictions, num_classes=num_labels, average='micro') loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_recall": recall, 'eval_precision': precision, 'eval_f1': f1, "eval_loss": loss, }
def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) # print(label_ids.get_shape()) #[?,128] # print(pred_ids.get_shape()) #[64,128] # print(num_labels) #17 # print(weight.get_shape()) #[128,] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # labels = [] # for i, x in enumerate() predict_labels = [] # for i in range(1, num_labels - 4): # predict_labels.append(i) # precision = tf_metrics.precision(label_ids, predictions, num_labels, predict_labels, average="macro") # recall = tf_metrics.recall(label_ids, predictions, num_labels, predict_labels, average="macro") # f = tf_metrics.f1(label_ids, predictions, num_labels, predict_labels, average="macro") precision = tf_metrics.precision(label_ids, predictions, num_labels, average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) n_class = [i for i in range(len(LABELS) - 2)] precision = tf_metrics.precision(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") recall = tf_metrics.recall(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") f = tf_metrics.f1(label_ids, predictions, len(LABELS) + 1, [1, 2, 3], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits, is_real_example): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # print("predictions shape: " + str(predictions.get_shape().as_list())) # print("label_ids shape: " + str(label_ids.get_shape().as_list())) # print("is_real_example shape: " + str(is_real_example.get_shape().as_list())) precision = tf_metrics.precision(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") recall = tf_metrics.recall(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") f = tf_metrics.f1(label_ids, predictions, num_labels, [2, 3, 4, 5], average="macro") accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) # precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, "eval_accuracy": accuracy, "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) print(logits) print(predictions) print(label_ids) #label_ids_array = label_ids.eval() #predictions_array = predictions.eval() #predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) #print(predictions_array) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) precision = tf_metrics.precision( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") recall = tf_metrics.recall( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") f = tf_metrics.f1( label_ids, predictions, 19, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], average="micro") #class_repo = classification_report(label_ids_array, predictions_array ) return { "eval_accuracy": accuracy, "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_class_repo":class_repo, #"eval_loss": loss, }
def metric_fn(label_ids, predicted_labels, input_mask, num_labels): label_ids = tf.boolean_mask(label_ids, input_mask) predicted_labels = tf.boolean_mask(predicted_labels, input_mask) precision = tf_metrics.precision(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") recall = tf_metrics.recall(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") f1 = tf_metrics.f1(label_ids, predicted_labels, num_labels, [1, 2, 3], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f1 }
def metric_fn(seq_length, max_len, label_ids, pred_ids): indices = [1, 2] # indice参数告诉评估矩阵评估哪些标签 # Metrics weights = tf.sequence_mask(seq_length, maxlen=max_len) tf.logging.info("****shape in metrics***") label_ids_metric = tf.argmax(label_ids, 1) tf.logging.info(label_ids_metric.shape) tf.logging.info(pred_ids.shape) metrics = { 'acc': tf.metrics.accuracy(label_ids_metric, pred_ids), 'precision': precision(label_ids_metric, pred_ids, params['num_labels'], indices), 'recall': recall(label_ids_metric, pred_ids, params['num_labels'], indices), 'f1': f1(label_ids_metric, pred_ids, params['num_labels'], indices), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) return eval_metrics
def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(max_seq_length) # 0对应填充,1对应SEP, 是命名实体的当做正例(多分类时需要指定的),后面索引对应label_ids的 # precision = TP / (TP + FP) # 预测为正的样本中实际正样本的比例 # recall = TP / (TP + FN) # 实际正样本中预测为正的比例 # accuracy = (TP + TN) / (P + N) # F1-score = 2 / [(1 / precision) + (1 / recall)] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f1 = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f1": f1, # "eval_loss": loss, }
def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # 评估函数,计算准确率、召回率、F1,假如改类别的话,下方数字需要修改,10是总类别数,1-6是有用的类别。B、I、E, # 具体见 tf.metrics里的函数 precision = tf_metrics.precision(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") recall = tf_metrics.recall(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") f = tf_metrics.f1(label_ids, predictions, 10, [1, 2, 3, 4, 5, 6], average="macro") return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, }
def model_fn(features, labels, mode, params): training = (mode == tf.estimator.ModeKeys.TRAIN) input_ids = features["text"] author_id = features["author"] category_ids = features["categories"] label_id = features["label"] cnn = CnnModel(params, input_ids, author_id, category_ids, training) squeeze_label_ids = tf.squeeze(label_id, axis=1) logits, predict_label_ids, loss = cnn.build_network(squeeze_label_ids,) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions #words = tf.contrib.lookup.index_to_string_table_from_file(params['vocab']) #input_words = words.lookup(tf.to_int64(input_ids)) predictions = { 'true_label_ids': squeeze_label_ids, 'predict_label_ids': predict_label_ids, } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss train_op = optimization.create_optimizer(loss, params['learning_rate'], params['train_steps'], params['num_warmup_steps']) if mode == tf.estimator.ModeKeys.EVAL: # Metrics metrics = { 'acc': tf.metrics.accuracy(squeeze_label_ids, predict_label_ids), # 分别计算各个类的P, R 然后按类求平均值 'precision': precision(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), 'recall': recall(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), 'f1': f1(squeeze_label_ids, predict_label_ids, params['label_size'], average='macro'), } return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving, features are a bit different #if isinstance(features, dict): # features = features['words'], features['nwords'] # Read vocabs and inputs #import ipdb #ipdb.set_trace() dropout = args.dropout #input_ids = features["input_ids"] #mask = features["mask"] #segment_ids = features["segment_ids"] #label_ids = features["label_ids"] ##words, nwords = features #tf.print(' '.join(words[4]), output_stream=sys.stderr) training = (mode == tf.estimator.ModeKeys.TRAIN) #vocab_words = tf.contrib.lookup.index_table_from_file( # #args.vocab_words) # args.vocab_words, num_oov_buckets=args.num_oov_buckets) #with Path(args.vocab_tags).open() as f: # indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] # num_tags = len(indices) + 1 ##word_ids = vocab_words.lookup(words) if args.embedding == 'word2id': # word2id with Path(args.vocab_words).open(encoding='utf-8') as f: vocab_words_1 = f.readlines() vocab_length = len(vocab_words_1) input_ids = features["input_ids"] label_ids = features["label_ids"] mask = features["mask"] embeddings = embedding(input_ids, vocab_length, args) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass elif args.embedding == 'bert': from my_model.embeddings.embedding import get_bert_embedding input_ids = features["input_ids"] mask = features["mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] embeddings = get_bert_embedding(args.bert_config_file, training, input_ids, mask, segment_ids, use_one_hot_embeddings=False) else: # Word Embeddings # deafult input_ids = features["input_ids"] label_ids = features["label_ids"] mask = features["mask"] glove = np.load(args.glove)['embeddings'] # np.array variable = np.vstack([glove, [[0.] * args.dim]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, input_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass (total_loss, logits, predicts) = create_model(embeddings, label_ids, mask, training, self.num_labels, use_one_hot_embeddings=False) tvars = tf.trainable_variables() initialized_variable_names = None scaffold_fn = None if args.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, args.init_checkpoint) tf.train.init_from_checkpoint(args.init_checkpoint, assignment_map) self.logging.debug("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" self.logging.debug(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: warmup_steps = args.warmup_steps step = tf.to_float(tf.train.get_global_step()) if args.learning_rate_decay == 'sqrt': lr_warmup = args.learning_rate_peak * tf.minimum( 1.0, step / warmup_steps) lr_decay = args.learning_rate_peak * tf.minimum( 1.0, tf.sqrt(warmup_steps / step)) lr = tf.where(step < warmup_steps, lr_warmup, lr_decay) elif args.learning_rate_decay == 'exp': lr = tf.train.exponential_decay( args.learning_rate_peak, global_step=step, decay_steps=args.decay_steps, decay_rate=args.decay_rate) elif args.learning_rate_decay == 'bert': num_train_steps = int(self.len_train_examples / args.batch_size * args.epochs) #num_warmup_steps = int(num_train_steps * args.warmup_steps) num_warmup_steps = int(num_train_steps * 0.1) train_op = optimization.create_optimizer( total_loss, args.learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) return output_spec else: self.logging.info( 'learning rate decay strategy not supported') sys.exit() tf.print(lr) train_op = tf.train.AdamOptimizer(lr).minimize( total_loss, global_step=tf.train.get_or_create_global_step()) #return tf.estimator.EstimatorSpec( # mode, loss=loss, train_op=train_op) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) elif mode == tf.estimator.ModeKeys.EVAL: #def metric_fn(label_ids, logits,num_labels,mask): # predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) # cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask) # return { # "confusion_matrix":cm # } # # #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask]) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( # Metrics #weights = tf.sequence_mask(nwords) weights = mask #mask2len = tf.reduce_sum(mask,axis=1) #weights = tf.sequence_mask(mask2len) #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32) pred_ids = predicts num_label_ids = self.num_labels metrics = { 'acc': tf.metrics.accuracy(label_ids, pred_ids, weights), #'precision': tf.metrics.precision(label_ids, pred_ids, weights), #'recall': tf.metrics.recall(label_ids, pred_ids, weights), ##'f1': f1(label_ids, pred_ids, weights), 'precision': precision(label_ids, pred_ids, self.num_labels, self.indices, weights), 'recall': recall(label_ids, pred_ids, self.num_labels, self.indices, weights), 'f1': f1(label_ids, pred_ids, self.num_labels, self.indices, weights), } output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=metrics #scaffold_fn=scaffold_fn ) else: #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predicts, #scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1] # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len( indices) + 1 # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings,学习字符嵌入向量 char_ids = vocab_chars.lookup(chars) # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后 # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动 variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], dtype=tf.float32) # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars']))) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码 mask = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, mask, params['filters'], params['kernel_size']) # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # Bi-LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关 # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征 logits = tf.layers.dense( output, num_tags) # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics mask = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, mask), 'precision': precision(tags, pred_ids, num_tags, indices, mask), 'recall': recall(tags, pred_ids, num_tags, indices, mask), 'f1': f1(tags, pred_ids, num_tags, indices, mask), } # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而 # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0] # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变 for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( loss, global_step=tf.train.get_or_create_global_step()) # 默认学习率1e-3 return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def __model_fn(self, features, labels, mode, params): '模型结构: Bi_LSTM + CRF' 'features: 特征列; labels: tag列; ' 'mode: tf.estimator.Estimator()自带的参数,用于判定TRAIN EVAL PREDICT三种类型' 'params: 参数词典' # 判断features是那种类型:类型1:((([None],()),([None,None],[None])),[None]),这是self.__input_fn()输出的类型 # 类型2: {'words':[word1,word2,..],'nwords':number,'chars':[['J','o',..],['l',..],..],'nchars':number}, # 这是我们在预测时输入的类型 if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) with tf.name_scope('Read_data'): # 获取特征列各项 (words, nwords), (chars, nchars) = features # words是单词列表,nwords是其相应的数量 # 获取汉语单字或英文字母的词包,eg: {char1:int64} vocab_chars = tf.contrib.lookup.index_table_from_file( params['char_vocabulary'], num_oov_buckets=params['num_oov_buckets']) # 获取汉语词语或英文单词的词包,eg:{char2:int64} vocab_words = tf.contrib.lookup.index_table_from_file( params['word_vocabulary'], num_oov_buckets=params['num_oov_buckets']) # 获取标记对应的索引,不包括用于填充batch的padding_tag with Path(params['tags']).open('r', encoding='utf-8') as fi: # indices用于存储正类tag的索引,即不包含padding_tag indices = [ idx for idx, tag in enumerate(fi) if tag.strip() != params.get('padding_tag', 'pad') ] num_tags = len(indices) + 1 # 总体的tag数量还要加上padding_tag,用于构建转移矩阵 # 获取汉语单字或英文字母的数量 with Path(params['char_vocabulary']).open('r', encoding='utf-8') as fi: # # char的数量还得加上,不在词包中的字符我们给它们的索引数量 num_chars = sum(1 for _ in fi) + params['num_oov_buckets'] # 判断模式:训练,评估,预测 training = (mode == tf.estimator.ModeKeys.TRAIN) with tf.name_scope('Char_Embeddings_Layer'): char_ids = vocab_chars.lookup(chars) # 获取字母列表的id列表 # char2vec = tf.get_variable('char_embeddings',[num_chars,params['char2vec_dim']],tf.float32) # char_embeddings = tf.nn.embedding_lookup(char2vec,char_ids) # 是否加载外部的汉字单字或英文字母的向量 if params['if_load_char2vec']: char2vec = np.load( params['char2vec'])['embeddings'] # 加载词向量,可通过char_id查找获取 # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 char2vec = np.vstack( [char2vec, [[0.] * params['char2vec_dim']]]) char2vec = tf.Variable(char2vec, dtype=tf.float32, trainable=False) # 词向量表转为tf.tensor,不可训练 # 获取字母列表中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size) # 这里batch是每条输入中的单词个数 char_embeddings = tf.nn.embedding_lookup(char2vec, char_ids) else: # 通过模型训练词向量 with Path(params['char_vocabulary']).open( 'r', encoding='utf-8') as fi: char_vocab = [ word for idx, word in enumerate(fi) if word.strip() != '' ] char2vec = tf.get_variable( 'char2vec', [len(char_vocab), params['char2vec_dim']]) # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 padding_vec = tf.Variable([[0.] * params['char2vec_dim']], dtype=tf.float32) char2vec = tf.concat([char2vec, padding_vec], axis=0) char2vec = tf.Variable(char2vec, dtype=tf.float32, trainable=True) # 词向量表转为tf.tensor,可训练 # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0 # 因此需要特别处理一下,每次都需要将char2vec最后一个向量变为全0,我们用mask # 再构建一张lookup_table,形状与char2vec一致,其中除了最后一行元素全为0外,其余都是1 mask = [params['char2vec_dim'] ] * len(char_vocab) + [0] * params['char2vec_dim'] mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32) mask_vec = tf.nn.embedding_lookup(mask_lookup_table, char_ids) # 获取单词中每个字母的词向量,由于是batch,故shape= (batch_size,time_len,input_size) # 这里batch是每条输入中的单词个数 embeddings = tf.nn.embedding_lookup(char2vec, char_ids) # 将char_ids中的padding_tag的向量重置为0 char_embeddings = tf.multiply(embeddings, mask_vec) with tf.name_scope('Char_Embedding_Dropout_Layer'): # char_embeddings.shape = (None,None,None,params['char2vec_dim'] # 第一个None是batch_size,第二个是每条输入中的单词个数 # 第三个None是每条输入中每个单词包含的字母个数的列表 char_embeddings = tf.layers.dropout(char_embeddings, rate=params['dropout'], training=training) with tf.name_scope('Char_LSTM_Layer'): dim_words = tf.shape(char_embeddings)[1] # 当前输入中的单词个数 dim_chars = tf.shape(char_embeddings)[2] # 当前输入中的每个单词的字母个数 flat = tf.reshape(char_embeddings, [-1, dim_chars, params['char2vec_dim']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell( params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_fw) # 获取正向LSTM最后一时刻的输出 _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) # 获取反向LSTM最后一时刻的输出 _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) # 将这两个时刻的输出按最后一维度拼接 output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape( output, [-1, dim_words, params['char_lstm_size'] * 2]) with tf.name_scope('Word_Embeddings_Layer'): word_ids = vocab_words.lookup(words) # 获取单词列表的id列表 # 是否加载外部的词向量 if params['if_load_word2vec']: word2vec = np.load( params['word2vec'])['embeddings'] # 加载词向量,可通过word_id查找获取 # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 word2vec = np.vstack( [word2vec, [[0.] * params['word2vec_dim']]]) word2vec = tf.Variable(word2vec, dtype=tf.float32, trainable=False) # 词向量表转为tf.tensor,不可训练 # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size) word_embeddings = tf.nn.embedding_lookup(word2vec, word_ids) else: # 通过模型训练词向量 with Path(params['word_vocabulary']).open( 'r', encoding='utf-8') as fi: vocab = [ word for idx, word in enumerate(fi) if word.strip() != '' ] word2vec = tf.get_variable( 'word2vec', [len(vocab), params['word2vec_dim']]) # 为padding_tag添加词向量,用全0向量表示,注意shape要保持一致 padding_vec = tf.Variable([[0.] * params['word2vec_dim']], dtype=tf.float32) word2vec = tf.concat([word2vec, padding_vec], axis=0) word2vec = tf.Variable(word2vec, dtype=tf.float32, trainable=True) # 词向量表转为tf.tensor,可训练 # 这里需要注意,padding_tag的向量应该是全0,但是在训练词向量过程中,padding_tag难以保持为全0 # 因此需要特别处理一下,每次都需要将word2vec最后一个向量变为全0,我们用mask # 再构建一张lookup_table,形状与word2vec一致,其中除了最后一行元素全为0外,其余都是1 mask = [params['word2vec_dim'] ] * len(vocab) + [0] * params['word2vec_dim'] mask_lookup_table = tf.sequence_mask(mask, dtype=tf.float32) mask_vec = tf.nn.embedding_lookup(mask_lookup_table, word_ids) # 获取单词列表中每个单词的词向量,由于是batch,故shape= (batch_size,time_len,input_size) embeddings = tf.nn.embedding_lookup(word2vec, word_ids) # 将word_ids中的padding_tag的向量重置为0 word_embeddings = tf.multiply(embeddings, mask_vec) with tf.name_scope('Concatenate_CharEmbedding_WordEmbedding'): embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) with tf.name_scope('Dropout_Layer'): embeddings = tf.layers.dropout(embeddings, rate=params['dropout'], training=training) with tf.name_scope('Word_Bi_LSTM'): # 将输入形状转为shape=(time_len,batch_size,input_size),方便LSTM计算 inputs = tf.transpose(embeddings, perm=[1, 0, 2]) # 正向LSTM lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell( params['word_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell( params['word_lstm_size']) # 反向LSTM lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) # 正向每时刻隐藏层状态 output_fw, _ = lstm_cell_fw(inputs, dtype=tf.float32, sequence_length=nwords) # 反向每时刻隐藏层状态 output_bw, _ = lstm_cell_bw(inputs, dtype=tf.float32, sequence_length=nwords) # 将两个方向的状态,按时刻前后拼接在一起,沿最后一轴拼接 output = tf.concat([output_fw, output_bw], axis=-1) # 将output形状再变回来shape = (batch_size,time_len,input_size) output = tf.transpose(output, perm=[1, 0, 2]) with tf.name_scope('LSTM_dropout'): output = tf.layers.dropout(output, rate=params['dropout'], training=training) with tf.name_scope('Fully_connected_layer'): # 全连接层计算每一时刻的得分值 logits = tf.layers.dense(output, num_tags) with tf.name_scope('CRF'): # CRF转移矩阵 crf_params = tf.get_variable('crf', [num_tags, num_tags], dtype=tf.float32) # crf解码,pred_ids是预测的标记列表 pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # 判断是训练,评估,还是预测 if mode == tf.estimator.ModeKeys.PREDICT: # 预测 # 获取标记tag与其索引的字典,格式为{id:tag,..} reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) # 将tag的id映射到tag上,获取预测的标记tag pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) # 此字典存储,需要预测的内容 predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss # 获取标记与其索引映射表,{tag:id},注意包含了填充标记pad vocab_tags = tf.contrib.lookup.index_table_from_file( params['tags']) # 将真实tag转为id序列 tags = vocab_tags.lookup(labels) # 计算损失函数,负的对数似然 log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # 评估指标 weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) # 评估 if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # 训练 elif mode == tf.estimator.ModeKeys.TRAIN: # 优化器 train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, # training=training) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['char_filters'], params['char_kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['w2v'])['embeddings'] # np.array print("glove shape", glove.shape) variable = np.vstack([glove, [[0.] * params['dim']]]) # [vob_size, emb_size] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # sess = tf.InteractiveSession() # emb_shape = sess.run(tf.shape(embeddings)) # print("-"*50,'emb_shape:',emb_shape) # block_unflat_scores shape: [batch_size, max_seq_len, class_num] block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False) pred_ids = tf.argmax(block_unflat_scores[-1], 2) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32) # input_mask = tf.ones_like(words,dtype=tf.int32) # for i, real_seq_len in enumerate(nwords): # input_mask[i, real_seq_len:] = 0 # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int") # for i, real_seq_len in enumerate(nwords.eval()): # input_mask[i, real_seq_len:] = 0 vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) # CalculateMean cross-entropy loss with tf.name_scope("loss"): loss = tf.constant(0.0) # labels = tf.cast(labels, 'int32') # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape]) # print(block_unflat_scores[-1].shape) # tags = tf.Print(tags,[tags.shape]) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=block_unflat_scores[-1], labels=tags) # masked_losses = tf.multiply(losses, input_mask) # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask)) loss += tf.reduce_sum(losses) loss += params["l2_penalty"] * l2_loss # Metrics weights = tf.sequence_mask(nwords) # tags_min = tf.reduce_min(tags) # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min") # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags") metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(self, features, labels, mode, params): # For serving, features are a bit different if isinstance(features, dict): features = features['words'], features['nwords'] # Read vocabs and inputs dropout = params['dropout'] words, nwords = features #nwords = tf.shape(words)[0] #print('###########tf.shape nwords:{}#######'.format(nwords)) training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) if mode == tf.estimator.ModeKeys.PREDICT: # Word Embeddings word_ids = vocab_words.lookup(words) if self.embeding == 'glove': glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.]*params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=True) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # add by panyc # with Path(params['words']).open() as f: # vocab_words = f.readlines() # vocab_length = len(vocab_words) # end else: embeddings = tf.Variable( # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0)) tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574) ) embeddings = tf.nn.embedding_lookup(embeddings, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM # t = tf.transpose(embed, perm=[1, 0, 2]) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, params['num_tags']) crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: print('##########nwords:{}###########'.format(nwords)) #words_shards = tf.split(words, self.num_gpus) #labels_shards = tf.split(labels, self.num_gpus) words = tf.cond(tf.less(tf.shape(words)[0], self.num_gpus), \ lambda:tf.concat([words]*self.num_gpus,0),lambda:words) nwords = tf.cond(tf.less(tf.shape(nwords)[0], self.num_gpus), \ lambda:tf.concat([nwords]*self.num_gpus,0),lambda:nwords) labels = tf.cond(tf.less(tf.shape(labels)[0], self.num_gpus), \ lambda:tf.concat([labels]*self.num_gpus,0),lambda:labels) n = (tf.shape(words)[0]//self.num_gpus ) * self.num_gpus words = words[:n] nwords = nwords[:n] labels = labels[:n] words_shards = tf.split(words, self.num_gpus) nwords_shards = tf.split(nwords, self.num_gpus) labels_shards = tf.split(labels, self.num_gpus) loss_shards = [] grad_shards = [] metric_accuracy = [] accuracy_op = None metric_precision = [] precision_op = None metric_recall = [] recall_op = None metric_f1 = [] f1_op = None #nwords = tf.div(nwords, self.num_gpus) #nwords=10 #nwords = tf.constant([nwords,], dtype=tf.int32) for i, device in enumerate(self.devices): with tf.variable_scope( tf.get_variable_scope(), reuse=True if i > 0 else None): with tf.device(device): words = words_shards[i] nwords = nwords_shards[i] labels = labels_shards[i] word_ids = vocab_words.lookup(words) if self.embeding == 'glove': glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.]*params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=True) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # add by panyc # with Path(params['words']).open() as f: # vocab_words = f.readlines() # vocab_length = len(vocab_words) # end else: embeddings = tf.Variable( # tf.random_uniform([vocab_length + 1, 300], -1.0, 1.0)) tf.random_normal([params['embeding_size'], 300], 0.0, 0.057735026918962574) ) embeddings = tf.nn.embedding_lookup(embeddings, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM # t = tf.transpose(embed, perm=[1, 0, 2]) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, params['num_tags']) crf_params = tf.get_variable("crf", [params['num_tags'], params['num_tags']], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) # vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'], num_oov_buckets=params['num_oov_buckets']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) loss_shards.append(loss) weights = tf.sequence_mask(nwords,tf.shape(tags)[1]) ## add by panyc #weights = tf.expand_dims(weights,axis=0) ## end val,accuracy_op = tf.metrics.accuracy(tags, pred_ids, weights) metric_accuracy.append([val]) val,precision_op = precision(tags, pred_ids, params['num_tags'], self.indices, weights) metric_precision.append([val]) val,recall_op = recall(tags, pred_ids, params['num_tags'], self.indices, weights) metric_recall.append([val]) val,f1_op = f1(tags, pred_ids, params['num_tags'], self.indices, weights) metric_f1.append([val]) loss = tf.reduce_mean(loss_shards) metric_accuracy = tf.reduce_mean(metric_accuracy) metric_precision = tf.reduce_mean(metric_precision) metric_recall = tf.reduce_mean(metric_recall) metric_f1 = tf.reduce_mean(metric_f1) metrics = { 'acc': (metric_accuracy,accuracy_op), 'precision': (metric_precision,precision_op), 'recall': (metric_recall, recall_op), 'f1': (metric_f1, f1_op), } # Metrics #weights = tf.sequence_mask(nwords) for metric_name, op in metrics.items(): print('############op##########') print(op) tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: # train_op = tf.train.AdamOptimizer().minimize( # loss, global_step=tf.train.get_or_create_global_step()) train_op = tf.train.AdamOptimizer(learning_rate=self.params['learnning_rate']).minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
def build_metrics(self): with tf.name_scope('Metrics'): average = 'micro' with tf.name_scope('Train'): self.train_target_precision, self.train_target_precision_op = tf_metrics.precision( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.train_target_recall, self.train_target_recall_op = tf_metrics.recall( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.train_target_f1, self.train_target_f1_op = tf_metrics.f1( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.train_sentiment_precision, self.train_sentiment_precision_op = tf_metrics.precision( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average) self.train_sentiment_recall, self.train_sentiment_recall_op = tf_metrics.recall( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average) self.train_sentiment_f1, self.train_sentiment_f1_op = tf_metrics.f1( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average) with tf.name_scope('Test'): self.test_target_precision, self.test_target_precision_op = tf_metrics.precision( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.test_target_recall, self.test_target_recall_op = tf_metrics.recall( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.test_target_f1, self.test_target_f1_op = tf_metrics.f1( self.y_target, self.target_preds, self.C_tar, [i for i in range(1, self.C_tar)], average=average) self.test_sentiment_precision, self.test_sentiment_precision_op = tf_metrics.precision( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average) self.test_sentiment_recall, self.test_sentiment_recall_op = tf_metrics.recall( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average) self.test_sentiment_f1, self.test_sentiment_f1_op = tf_metrics.f1( self.y_sentiment, self.sentiment_preds, self.C_sent, [i for i in range(1, self.C_sent)], average=average)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 params['num_tags'] = num_tags # Graph (words, nwords), (chars, nchars) = features logits, crf_params = graph_fn(features, labels, mode, params) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) # Moving Average variables = tf.get_collection('trainable_variables', 'graph') ema = tf.train.ExponentialMovingAverage(0.999) ema_op = ema.apply(variables) logits_ema, crf_params_ema = graph_fn(features, labels, mode, params, reuse=True, getter=ema_getter(ema)) pred_ids_ema, _ = tf.contrib.crf.crf_decode(logits_ema, crf_params_ema, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) pred_strings_ema = reverse_vocab_tags.lookup(tf.to_int64(pred_ids_ema)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings, 'pred_ids_ema': pred_ids_ema, 'tags_ema': pred_strings_ema, } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'acc_ema': tf.metrics.accuracy(tags, pred_ids_ema, weights), 'pr': precision(tags, pred_ids, num_tags, indices, weights), 'pr_ema': precision(tags, pred_ids_ema, num_tags, indices, weights), 'rc': recall(tags, pred_ids, num_tags, indices, weights), 'rc_ema': recall(tags, pred_ids_ema, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), 'f1_ema': f1(tags, pred_ids_ema, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step(), var_list=variables) train_op = tf.group([train_op, ema_op]) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): if isinstance(features, dict): features = features['words'], features['nwords'] dropout = params['dropout'] words, nwords = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) dense_layer = tf.layers.Dense(num_tags) logits = dense_layer(output) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) ########## epsilon = 5 perturbed = _add_perturbation(embeddings, loss, epsilon) t = tf.transpose(perturbed, perm=[1, 0, 2]) output_fw1, _1 = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw1, _1 = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output1 = tf.concat([output_fw1, output_bw1], axis=-1) output1 = tf.transpose(output1, perm=[1, 0, 2]) output1 = tf.layers.dropout(output1, rate=dropout, training=training) logits1 = dense_layer(output1) log_likelihood1, _1 = tf.contrib.crf.crf_log_likelihood( logits1, tags, nwords, crf_params) adv_loss = tf.reduce_mean(-log_likelihood1) loss += adv_loss ########## weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(mode, features, labels, params): model_params = ModelParams(**params['model_params']) training_params = TrainingParams.from_dict(params['training_params']) prediction_type = params['prediction_type'] classes_file = params['classes_file'] input_images = features['images'] if mode == tf.estimator.ModeKeys.PREDICT: margin = training_params.training_margin input_images = tf.pad( input_images, [[0, 0], [margin, margin], [margin, margin], [0, 0]], mode='SYMMETRIC', name='mirror_padding') if model_params.pretrained_model_name == 'vgg16': network_output = inference_vgg16( input_images, model_params, model_params.n_classes, use_batch_norm=model_params.batch_norm, weight_decay=model_params.weight_decay, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) key_restore_model = 'vgg_16' elif model_params.pretrained_model_name == 'resnet50': # Modified by me: added 'selected_intermediate_layers' network_output, selected_intermediate_layers = inference_resnet_v1_50( input_images, model_params, model_params.n_classes, use_batch_norm=model_params.batch_norm, weight_decay=model_params.weight_decay, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) key_restore_model = 'resnet_v1_50' #added for classification elif model_params.pretrained_model_name == 'resnet50_classification': network_output, selected_intermediate_layers = inference_resnet_v1_50_classification( input_images, model_params, model_params.n_classes, use_batch_norm=model_params.batch_norm, weight_decay=model_params.weight_decay, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) key_restore_model = 'resnet_v1_50' elif model_params.pretrained_model_name == 'unet': network_output = inference_u_net( input_images, model_params, model_params.n_classes, use_batch_norm=model_params.batch_norm, weight_decay=model_params.weight_decay, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) key_restore_model = None else: raise NotImplementedError if mode == tf.estimator.ModeKeys.TRAIN: if key_restore_model is not None: # Pretrained weights as initialization pretrained_restorer = tf.train.Saver(var_list=[ v for v in tf.global_variables() if key_restore_model in v.name ]) def init_fn(scaffold, session): pretrained_restorer.restore(session, model_params.pretrained_model_file) else: init_fn = None else: init_fn = None if mode == tf.estimator.ModeKeys.PREDICT: pass #margin = training_params.training_margin # Crop padding #if margin > 0: # network_output = network_output[:, margin:-margin, margin:-margin, :] # Prediction # ---------- # Added by me: second dictionary intermediate_layers_dict = {} if prediction_type == PredictionType.CLASSIFICATION: #squeezed for image classification #network_output = tf.Print(network_output, [tf.shape(tf.nn.softmax(tf.squeeze(network_output))), tf.shape(labels)]) prediction_probs = tf.nn.softmax(tf.squeeze(network_output), name='softmax') prediction_labels = tf.argmax(tf.squeeze(network_output), axis=-1, name='label_preds') predictions = {'probs': prediction_probs, 'labels': prediction_labels} # Added by me: second dictionary desired_endpoints = [ 'resnet_v1_50/conv1', 'resnet_v1_50/block1/unit_3/bottleneck_v1', 'resnet_v1_50/block2/unit_4/bottleneck_v1', 'resnet_v1_50/block3/unit_6/bottleneck_v1', 'resnet_v1_50/block4/unit_3/bottleneck_v1' ] #added by me, commented due to classification for index, selected_intermediate_layer in enumerate( selected_intermediate_layers): intermediate_layers_dict[ desired_endpoints[index]] = selected_intermediate_layer elif prediction_type == PredictionType.REGRESSION: predictions = {'output_values': network_output} prediction_labels = network_output elif prediction_type == PredictionType.MULTILABEL: with tf.name_scope('prediction_ops'): prediction_probs = tf.nn.sigmoid(network_output, name='sigmoid') # [B,H,W,C] prediction_labels = tf.cast( tf.greater_equal(prediction_probs, 0.5, name='labels'), tf.int32) # [B,H,W,C] predictions = { 'probs': prediction_probs, 'labels': prediction_labels } else: raise NotImplementedError # Loss # ---- if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]: regularized_loss = tf.losses.get_regularization_loss() if prediction_type == PredictionType.CLASSIFICATION: #onehot_labels = tf.one_hot(indices=labels, depth=model_params.n_classes) #network_output = tf.Print(network_output, [network_output]) with tf.name_scope("loss"): per_pixel_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.squeeze(network_output), labels=labels, name='per_pixel_loss') #per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output, # labels=onehot_labels, name='per_pixel_loss') #if training_params.focal_loss_gamma > 0.0: # # Probability per pixel of getting the correct label # probs_correct_label = tf.reduce_max(tf.multiply(prediction_probs, onehot_labels)) # modulation = tf.pow((1. - probs_correct_label), training_params.focal_loss_gamma) # per_pixel_loss = tf.multiply(per_pixel_loss, modulation) if training_params.weights_labels is not None: weight_mask = tf.reduce_sum(tf.constant( np.array(training_params.weights_labels, dtype=np.float32)[None, None, None]) * onehot_labels, axis=-1) per_pixel_loss = per_pixel_loss * weight_mask if training_params.local_entropy_ratio > 0: assert 'weight_maps' in features r = training_params.local_entropy_ratio per_pixel_loss = per_pixel_loss * ( (1 - r) + r * features['weight_maps']) elif prediction_type == PredictionType.REGRESSION: per_pixel_loss = tf.squared_difference(labels, network_output, name='per_pixel_loss') elif prediction_type == PredictionType.MULTILABEL: with tf.name_scope('sigmoid_xentropy_loss'): labels_floats = tf.cast(labels, tf.float32) per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels_floats, logits=network_output, name='per_pixel_loss') if training_params.weights_labels is not None: weight_mask = tf.maximum( tf.reduce_max(tf.constant( np.array(training_params.weights_labels, dtype=np.float32)[None, None, None]) * labels_floats, axis=-1), 1.0) per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] else: raise NotImplementedError margin = training_params.training_margin input_shapes = features['shapes'] with tf.name_scope('Loss'): def _fn(_in): output, shape = _in return tf.reduce_mean(output[margin:shape[0] - margin, margin:shape[1] - margin]) #per_img_loss = tf.map_fn(_fn, (per_pixel_loss, input_shapes), dtype=tf.float32) per_img_loss = per_pixel_loss loss = tf.reduce_mean(per_img_loss, name='loss') loss += regularized_loss else: loss, regularized_loss = None, None # Train # ----- if mode == tf.estimator.ModeKeys.TRAIN: # >> Stucks the training... Why ? # ema = tf.train.ExponentialMovingAverage(0.9) # tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, ema.apply([loss])) # ema_loss = ema.average(loss) if training_params.exponential_learning: global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay( training_params.learning_rate, global_step, decay_steps=200, decay_rate=0.95, staircase=False) else: learning_rate = training_params.learning_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize( loss, global_step=tf.train.get_or_create_global_step()) else: ema_loss, train_op = None, None # Summaries # --------- if mode == tf.estimator.ModeKeys.TRAIN: with tf.name_scope('summaries'): tf.summary.scalar('losses/loss', loss) tf.summary.scalar('losses/loss_per_batch', loss) tf.summary.scalar('losses/regularized_loss', regularized_loss) if prediction_type == PredictionType.CLASSIFICATION: pass #tf.summary.image('output/prediction', # tf.image.resize_images(class_to_label_image(prediction_labels, classes_file), # tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)), # max_outputs=1) #if model_params.n_classes == 3: # tf.summary.image('output/probs', # tf.image.resize_images(prediction_probs[:, :, :, :], # tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)), # max_outputs=1) #if model_params.n_classes == 2: # tf.summary.image('output/probs', # tf.image.resize_images(prediction_probs[:, :, :, 1:2], # tf.cast(tf.shape(network_output)[1:3] / 3, tf.int32)), # max_outputs=1) elif prediction_type == PredictionType.REGRESSION: summary_img = tf.nn.relu( network_output)[:, :, :, 0:1] # Put negative values to zero tf.summary.image('output/prediction', summary_img, max_outputs=1) elif prediction_type == PredictionType.MULTILABEL: labels_visualization = tf.cast(prediction_labels, tf.int32) labels_visualization = multiclass_to_label_image( labels_visualization, classes_file) tf.summary.image('output/prediction_image', tf.image.resize_images( labels_visualization, tf.cast( tf.shape(labels_visualization)[1:3] / 3, tf.int32)), max_outputs=1) class_dim = prediction_probs.get_shape().as_list()[-1] for c in range(0, class_dim): tf.summary.image('output/prediction_probs_{}'.format(c), tf.image.resize_images( prediction_probs[:, :, :, c:c + 1], tf.cast( tf.shape(network_output)[1:3] / 3, tf.int32)), max_outputs=1) # beta = tf.get_default_graph().get_tensor_by_name('upsampling/deconv_5/conv5/batch_norm/beta/read:0') # tf.summary.histogram('Beta', beta) # Evaluation # ---------- if mode == tf.estimator.ModeKeys.EVAL: if prediction_type == PredictionType.CLASSIFICATION: metrics = { 'eval/accuracy': tf.metrics.accuracy(labels, predictions=prediction_labels) } nr_classes = params['model_params']['n_classes'] for class_id in range(nr_classes): condition = tf.logical_or( tf.equal(class_id, tf.squeeze(labels)), tf.equal(class_id, tf.cast(tf.squeeze(prediction_labels), tf.int32))) weights = tf.cond(condition, lambda: 1, lambda: 0) precision_key = 'eval/precision_class_{}'.format(class_id) recall_key = 'eval/recall_class_{}'.format(class_id) pred = tf.reshape(tf.expand_dims(prediction_labels, axis=0), [1]) lab = tf.reshape(tf.expand_dims(labels, axis=0), [1]) weights = tf.reshape(tf.expand_dims(weights, axis=0), [1]) precision = tf_metrics.precision(labels=lab, predictions=pred, num_classes=nr_classes, pos_indices=[class_id], weights=weights, average='micro') recall = tf_metrics.recall(labels=lab, predictions=pred, num_classes=nr_classes, pos_indices=[class_id], weights=weights, average='micro') metrics[precision_key] = precision metrics[recall_key] = recall precision_key = 'eval/macro_mean_precision_per_class' recall_key = 'eval/macro_mean_recall_per_class' mean_precision = tf_metrics.precision(labels=lab, predictions=pred, num_classes=nr_classes, average='macro') mean_recall = tf_metrics.recall(labels=lab, predictions=pred, num_classes=nr_classes, average='macro') metrics[precision_key] = mean_precision metrics[recall_key] = mean_recall precision_key = 'eval/weighted_mean_precision_per_class' recall_key = 'eval/weighted_mean_recall_per_class' mean_precision = tf_metrics.precision(labels=lab, predictions=pred, num_classes=nr_classes, average='weighted') mean_recall = tf_metrics.recall(labels=lab, predictions=pred, num_classes=nr_classes, average='weighted') metrics[precision_key] = mean_precision metrics[recall_key] = mean_recall elif prediction_type == PredictionType.REGRESSION: metrics = { 'eval/accuracy': tf.metrics.mean_squared_error(labels, predictions=prediction_labels) } elif prediction_type == PredictionType.MULTILABEL: metrics = { 'eval/MSE': tf.metrics.mean_squared_error(tf.cast(labels, tf.float32), predictions=prediction_probs), 'eval/accuracy': tf.metrics.accuracy(tf.cast(labels, tf.bool), predictions=tf.cast( prediction_labels, tf.bool)) } else: metrics = None # Export # ------ if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = dict() if 'original_shape' in features.keys(): with tf.name_scope('ResizeOutput'): # resized_predictions = dict() # Resize all the elements in predictions # for k, v in predictions.items(): # Labels is rank-3 so we need to be careful in using tf.image.resize_images # assert isinstance(v, tf.Tensor) # v2 = v if len(v.get_shape()) == 4 else v[:, :, :, None] # v2 = tf.image.resize_images(v2, features['original_shape'], # method=tf.image.ResizeMethod.BILINEAR if v.dtype == tf.float32 # else tf.image.ResizeMethod.NEAREST_NEIGHBOR) # v2 = v2 if len(v.get_shape()) == 4 else v2[:, :, :, 0] # resized_predictions[k] = v2 # export_outputs['resized_output'] = tf.estimator.export.PredictOutput(resized_predictions) #added by me: second dictionary intermediate_predictions = dict() for k, v in intermediate_layers_dict.items(): assert isinstance(v, tf.Tensor) intermediate_predictions[k] = v export_outputs[ 'intermediate_layers'] = tf.estimator.export.PredictOutput( intermediate_predictions) predictions['original_shape'] = features['original_shape'] #added by me: second dictionary predictions.update(intermediate_layers_dict) export_outputs['output'] = tf.estimator.export.PredictOutput( predictions) export_outputs[ tf.saved_model.signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['output'] else: export_outputs = None return tf.estimator.EstimatorSpec( mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics, export_outputs=export_outputs, scaffold=tf.train.Scaffold(init_fn=init_fn))
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars']), (features['jasos'], features['njasos'])) # Read vocabs and inputs (words, nwords), (chars, nchars), (jasos, njasos) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) vocab_jasos = tf.contrib.lookup.index_table_from_file( params['jasos'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open(encoding="utf8") as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] with Path(params['jasos']).open(encoding="utf8") as f: num_jasos = sum(1 for _ in f) + params['num_oov_buckets'] # jaos embedding jaso_ids = vocab_jasos.lookup(jasos) variable = tf.get_variable('jasos_embeddings', [num_jasos, params['dim_chars']], tf.float32) jaso_embeddings = tf.nn.embedding_lookup(variable, jaso_ids) jaso_embeddings = tf.layers.dropout(jaso_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(jaso_embeddings)[1] dim_chars = tf.shape(jaso_embeddings)[2] flat = tf.reshape(jaso_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) jaso_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(char_embeddings)[1] dim_chars = tf.shape(char_embeddings)[2] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Word Embeddings word_ids = vocab_words.lookup(words) fasttext = np.load(params['fasttext'])['embeddings'] # np.array variable = np.vstack([fasttext, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32) #, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings, jaso_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs (words, nwords), (chars, nchars) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) #[[a,b][c,z]] => [[0,1][2,25]] variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) #dimension char embeddings [86,100] char_embeddings = tf.nn.embedding_lookup( variable, char_ids ) #char_ids [0,1] 0 va prendre le premier vecteur (variable [0,:]), donc [[0,1][2,25]] => [[variable[0,:],variable[1,:]][variable[2,:],variable[25,:]]] char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) #50% de l'entrée # Char LSTM dim_words = tf.shape( char_embeddings )[1] #max dim word (time len)(or number of chars max of a word)[nombre de phrase(batch),nombre de mots max,time len, dim char 100] dim_chars = tf.shape( char_embeddings )[2] #dimension de char 100 [nombre de phrase(batch),nombre de mots max,time len ,dim char 100] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars'] ]) #[?,max len word(or time len),100] t = tf.transpose(flat, perm=[1, 0, 2]) #[max len word(or time len),?,100] time major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state output = tf.concat( [output_fw, output_bw], axis=-1) #concat on the last D dimension of tensors 25+25 char_embeddings_lstm = tf.reshape( output, [-1, params['char_lstm_size'] * 2]) # [b,t,D] char_embeddings_lstm = tf.expand_dims(char_embeddings_lstm, -2) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings_cnn = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) char_embeddings_cnn = tf.reshape(char_embeddings_cnn, [-1, params['filters']]) char_embeddings_cnn = tf.expand_dims(char_embeddings_cnn, -2) #concat cnn and lstm char embeddings char_embeddings = tf.concat([char_embeddings_cnn, char_embeddings_lstm], axis=-2) #attention with tf.name_scope('Attention_layer'): attention_output, alphas = attention(char_embeddings, params['char_lstm_size'] * 2, time_major=False, return_alphas=True) tf.summary.histogram('alphas', alphas) char_embeddings = tf.reshape(attention_output, [-1, dim_words, params['char_lstm_size'] * 2]) # Word Embeddings word_ids = vocab_words.lookup( words ) #[[b'Peter', b'Blackburn'],[b'Yac', b'Amirat']] => [[b'0', b'1'],[b'2', b'3']] glove = np.load(params['glove'])[ 'embeddings'] # np.array glove made of vocab words (reduces list) variable = np.vstack([glove, [[0.] * params['dim']] ]) #concatenate on -1 axis, glove + [[0.]] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup( variable, word_ids ) #[[b'0', b'1'],[b'2', b'3']] => [[b'variable[0]', b'variable[1]'],[b'variable[2]', b'variable[3]']] [2,2,300] # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) #concat on the last dimension axis 100+300 embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) #50% de l'entrée # LSTM for lstm t = tf.transpose( embeddings, perm=[1, 0, 2] ) # Need time-major #put the word dim as first dimension. check batch-major VS time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) #ELMO elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False) word_embeddings = elmo(inputs={ "tokens": words, "sequence_len": nwords }, signature="tokens", as_dict=True)["elmo"] # Concatenate output LSTM1 and ELMO Embeddings, dropout embeddings = tf.concat([word_embeddings, output], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM 2 t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense( output, num_tags ) #nn dense input : (output of bilstm), output dimension : same shape excpet last dim will be num of tags crf_params = tf.get_variable( "crf", [num_tags, num_tags], dtype=tf.float32) #variable of crf pars matrix num_tags*num_tags pred_ids, _ = tf.contrib.crf.crf_decode( logits, crf_params, nwords ) #decode_tags: A [batch_size, max_seq_len] matrix, with dtype tf.int32. Contains the highest scoring tag indices. #potentials(logits): A [batch_size, max_seq_len, num_tags] tensor of unary potentials. if mode == tf.estimator.ModeKeys.PREDICT: #prediction # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup( tf.to_int64(pred_ids) ) #indices = tf.constant([1, 5], tf.int64) => ["lake", "UNKNOWN"] predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file( params['tags']) #get tags index from file tags = vocab_tags.lookup(labels) #replace lables by thier indexes log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params ) #calculate log_likelihood given the real tags, return: A [batch_size] Tensor containing the log-likelihood of each example, given the sequence of tag indices. loss = tf.reduce_mean( -log_likelihood ) #Computes the mean of elements across dimensions of a tensor. x = tf.constant([[1., 1.], [2., 2.]]) tf.reduce_mean(x) # 1.5 # Metrics weights = tf.sequence_mask( nwords ) #convert the vector of size n to a matrix of bool of size n * max value in the vector v[1,2] ==> m[[true,false],[true, true]] metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision( tags, pred_ids, num_tags, indices, weights ), #ground truth, predictions, num of tags 9, The indices of the positive classes, 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar( metric_name, op[1] ) #for tensor board#tuple of (scalar float Tensor, update_op) op[1] => update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy. if mode == tf.estimator.ModeKeys.EVAL: #Eval return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: #training train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step() ) #adam optimizer operation to optimize the loss, global_step: Optional Variable to increment by one after the variables have been updated. return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars), add_features = features add_features = add_features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable( 'chars', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max( char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) #Attention #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True) num_units = 200 #W1 = tf.get_variable("W1", [num_units, num_units], dtype=tf.float32) W1 = tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32) b1 = tf.get_variable("b1", [num_units, ], dtype=tf.float32) q = tf.tensordot(output, W1, axes=[[2], [0]]) out_shape = tf.shape(output) #b1_shuffled = self.b1.dimshuffle('x', 'x', 0) b1_shuffled = tf.expand_dims(b1, 0) b1_shuffled = tf.expand_dims(b1_shuffled, 0) #print("b shape",tf.shape(b1_shuffled)) q += b1_shuffled q = tf.tanh(q) q_trans = tf.transpose(q, perm=[0, 2, 1]) #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1)) out = tf.matmul(q, q_trans) #print("out dimension",out.shape) out *= (1 - tf.eye(out_shape[1], out_shape[1])) matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1]))) matrix = tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1])) #print("new dimension",matrix.shape) atten_out = tf.matmul(matrix,output) #print("atten dimension",atten_out.shape) #print("output dimension",output.shape) output = tf.concat([output, atten_out], axis=-1) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
def model_fn(mode, features, labels): # Logging Path('results').mkdir(exist_ok=True) tf.logging.set_verbosity(logging.INFO) handlers = [ logging.FileHandler('./results/main.log'), logging.StreamHandler(sys.stdout) ] logging.getLogger('tensorflow').handlers = handlers word_inputs, char_inputs = features training = (mode == tf.estimator.ModeKeys.TRAIN) # Embeddings embeddings = tf.get_variable('embeddings', [cfg.num_chars + 2, cfg.char_embed_dim]) char_input_emb = tf.nn.embedding_lookup(embeddings, char_inputs) # Reshaping for CNN output = tf.reshape(char_input_emb, [-1, tf.shape(char_inputs)[2], cfg.char_embed_dim]) # CNN output = tf.layers.conv1d(output, filters=64, kernel_size=2, strides=1, padding="same", activation=tf.nn.relu) output = tf.layers.max_pooling1d(output, pool_size=2, strides=2) output = tf.layers.conv1d(output, filters=128, kernel_size=2, strides=1, padding="same", activation=tf.nn.relu) output = tf.layers.max_pooling1d(output, pool_size=2, strides=2) cnn_output = tf.layers.dropout(output, rate=.5, training=training) cnn_output = tf.layers.flatten(cnn_output) # Reshaping CNN and concatenating for LSTM cnn_output = tf.reshape( cnn_output, [-1, tf.shape(char_inputs)[1], 128 * int(cfg.word_max_len / 4)]) word_inputs = tf.layers.dropout(word_inputs, rate=.5, training=training) lstm_inputs = tf.concat([word_inputs, cnn_output], axis=-1) # LSTM fw_cell = tf.contrib.rnn.LSTMCell(num_units=cfg.lstm_units) bw_cell = tf.contrib.rnn.LSTMCell(num_units=cfg.lstm_units) (fw_outputs, bw_outputs), (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, lstm_inputs, dtype=tf.float32) # Attention W = tf.Variable(tf.random_normal([cfg.lstm_units], stddev=0.1)) H = fw_outputs + bw_outputs M = tf.tanh(H) alpha = tf.nn.softmax( tf.reshape( tf.matmul(tf.reshape(M, [-1, cfg.lstm_units]), tf.reshape(W, [-1, 1])), (-1, tf.shape(word_inputs)[1]))) r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, tf.shape(word_inputs)[1], 1])) r = tf.squeeze(r) h_star = tf.tanh(r) h_drop = tf.nn.dropout(h_star, .5) # Dense FC_W = tf.Variable(tf.truncated_normal([cfg.lstm_units, 2], stddev=0.1)) FC_b = tf.Variable(tf.constant(0., shape=[2])) logits = tf.nn.xw_plus_b(h_drop, FC_W, FC_b) # Loss loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) # Gradient clipping # optimizer = tf.train.AdamOptimizer(1e-4) # gradients, variables = zip(*optimizer.compute_gradients(loss)) # gradients, _ = tf.clip_by_global_norm(gradients, .1) # train_op = optimizer.apply_gradients(zip(gradients, variables), tf.train.get_global_step()) # Metrics indices = [0, 1] labels = tf.argmax(labels, 1) pred_ids = tf.argmax(logits, 1) metrics = { 'acc': tf.metrics.accuracy(labels, pred_ids), 'precision': precision(labels, pred_ids, 2, indices, None, average='macro'), 'recall': recall(labels, pred_ids, 2, indices, None, average='macro'), 'f1': f1(labels, pred_ids, 2, indices, None, average='macro') } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer(cfg.learning_rate).minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)