def __wordchr_embedding_conv1d(self, inputs, keep_prob=0.5, scope='wordchr-embedding-conv1d'): """Compute character embeddings by masked conv1d and max-pooling """ with tf.variable_scope(scope): with tf.device('/cpu:0'): chr_embeddings = tf.Variable(tf.random_uniform( [self.chr_vocab_size, self.chr_dim], -1.0, 1.0), name='chr_embeddings') wordchr_embeddings_t = tf.nn.embedding_lookup( chr_embeddings, inputs ) # (batch_size, sentence_length, word_length, chr_dim) wordchr_embeddings_t = tf.nn.dropout(wordchr_embeddings_t, keep_prob) wordchr_embeddings_t = tf.reshape( wordchr_embeddings_t, [-1, self.word_length, self.chr_dim ]) # (batch_size*sentence_length, word_length, chr_dim) # masking t = tf.reshape(inputs, [-1, self.word_length ]) # (batch_size*sentence_length, word_length) masks = self.__compute_word_masks( t) # (batch_size*sentence_length, word_length) filters = self.__num_filters kernel_size = self.__filter_sizes[0] wordchr_embeddings = masked_conv1d_and_max(wordchr_embeddings_t, masks, filters, kernel_size, tf.nn.relu) # (batch_size*sentence_length, filters) -> (batch_size, sentence_length, filters) wordchr_embeddings = tf.reshape( wordchr_embeddings, [-1, self.sentence_length, filters]) return tf.nn.dropout(wordchr_embeddings, keep_prob)
def model_fn(features, labels, mode, params): # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars), add_features = features add_features = add_features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable( 'chars', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max( char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings,tf.cast(add_features, tf.float32)], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) #Attention #attention_output, alphas = attention(output, ATTENTION_SIZE, return_alphas=True) num_units = 200 #W1 = tf.get_variable("W1", [num_units, num_units], dtype=tf.float32) W1 = tf.get_variable("W1", [num_units, num_units], initializer=tf.glorot_uniform_initializer(),dtype=tf.float32) b1 = tf.get_variable("b1", [num_units, ], dtype=tf.float32) q = tf.tensordot(output, W1, axes=[[2], [0]]) out_shape = tf.shape(output) #b1_shuffled = self.b1.dimshuffle('x', 'x', 0) b1_shuffled = tf.expand_dims(b1, 0) b1_shuffled = tf.expand_dims(b1_shuffled, 0) #print("b shape",tf.shape(b1_shuffled)) q += b1_shuffled q = tf.tanh(q) q_trans = tf.transpose(q, perm=[0, 2, 1]) #out = tf.batched_dot(q, q.dimshuffle(0, 2, 1)) out = tf.matmul(q, q_trans) #print("out dimension",out.shape) out *= (1 - tf.eye(out_shape[1], out_shape[1])) matrix = tf.nn.softmax(tf.reshape(out,(out_shape[0] * out_shape[1], out_shape[1]))) matrix = tf.reshape(matrix,(out_shape[0] , out_shape[1], out_shape[1])) #print("new dimension",matrix.shape) atten_out = tf.matmul(matrix,output) #print("atten dimension",atten_out.shape) #print("output dimension",output.shape) output = tf.concat([output, atten_out], axis=-1) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) # num_oov_buckets是未出现在词汇表中的词下标[vocab_size, vocab_size+num_oov_buckets-1] # 如果num_oov_buckets<=0则未包含词返回参数default_value(默认-1) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len( indices) + 1 # indices是正类标签索引,O被作为负类不包含在indices中,在evaluate帮助度量计算 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings,学习字符嵌入向量 char_ids = vocab_chars.lookup(chars) # 论文要求的char_embeddings初始化方法[-sqrt(3/dim),sqrt(3/dim)],使用后 # f1 = 0.91270673,相比使用前f1 = 0.91264033提高了,但属于随机性的正常浮动 variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], dtype=tf.float32) # initializer=tf.random_uniform_initializer(-tf.sqrt(3/params['dim_chars']), tf.sqrt(3/params['dim_chars']))) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char 1d convolution, sequence_mask将int型单词字符个数转化为bool掩码 mask = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, mask, params['filters'], params['kernel_size']) # Word Embeddings,使用不训练词向量而是直接使用glove.840B.300d word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # Bi-LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF,线性链条件随机场输出变量的最大团为相邻2节点,故特征函数最多只与相邻2个输出变量有关 # logits代表crf中的一元状态特征,crf_params代表crf中的二元转移特征 logits = tf.layers.dense( output, num_tags) # 通过一个维度(output.shape[-1], num_tags)矩阵使得前面维度不变,最后一维变num_tags crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics mask = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, mask), 'precision': precision(tags, pred_ids, num_tags, indices, mask), 'recall': recall(tags, pred_ids, num_tags, indices, mask), 'f1': f1(tags, pred_ids, num_tags, indices, mask), } # tf.metrics.acuracy会返回accuracy和update_op,前者直接计算当前未更新即上衣batch的accuracy,而 # 后者会根据当前batch结果更新total和count(正确数)并返回更新后的accuracy,所以必须执行update_op,如果把op[0] # 即accuracy加入到summary中则total和count没有更新,accuracy始终不变 for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( loss, global_step=tf.train.get_or_create_global_step()) # 默认学习率1e-3 return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) # char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, # training=training) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['char_filters'], params['char_kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['w2v'])['embeddings'] # np.array print("glove shape", glove.shape) variable = np.vstack([glove, [[0.] * params['dim']]]) # [vob_size, emb_size] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # sess = tf.InteractiveSession() # emb_shape = sess.run(tf.shape(embeddings)) # print("-"*50,'emb_shape:',emb_shape) # block_unflat_scores shape: [batch_size, max_seq_len, class_num] block_unflat_scores, _, l2_loss = feature_layers(embeddings, reuse=False) pred_ids = tf.argmax(block_unflat_scores[-1], 2) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss # input_mask = tf.ones(shape=[words.get_shape().as_list()[0], params["max_seq_len"]], dtype=tf.int32) # input_mask = tf.ones_like(words,dtype=tf.int32) # for i, real_seq_len in enumerate(nwords): # input_mask[i, real_seq_len:] = 0 # input_mask = np.zeros((params["batch_size"], params["max_seq_len"])).astype("int") # for i, real_seq_len in enumerate(nwords.eval()): # input_mask[i, real_seq_len:] = 0 vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) # CalculateMean cross-entropy loss with tf.name_scope("loss"): loss = tf.constant(0.0) # labels = tf.cast(labels, 'int32') # block_unflat_scores = tf.Print(block_unflat_scores,[block_unflat_scores[-1].shape]) # print(block_unflat_scores[-1].shape) # tags = tf.Print(tags,[tags.shape]) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=block_unflat_scores[-1], labels=tags) # masked_losses = tf.multiply(losses, input_mask) # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask)) loss += tf.reduce_sum(losses) loss += params["l2_penalty"] * l2_loss # Metrics weights = tf.sequence_mask(nwords) # tags_min = tf.reduce_min(tags) # tags_min=tf.Print(tags_min,[tags_min], message="debug mertics tags_min") # tags = tf.Print(tags,[tags,tags_min], message="debug mertics tags") metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def graph_fn(features, labels, mode, params, reuse=None, getter=None): # Read vocabs and inputs num_tags = params['num_tags'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) with tf.variable_scope('graph', reuse=reuse, custom_getter=getter): # Read vocabs and inputs dropout = params['dropout'] (words, nwords), (chars, nchars) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) return logits, crf_params
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs (words, nwords), (chars, nchars) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) #[[a,b][c,z]] => [[0,1][2,25]] variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) #dimension char embeddings [86,100] char_embeddings = tf.nn.embedding_lookup( variable, char_ids ) #char_ids [0,1] 0 va prendre le premier vecteur (variable [0,:]), donc [[0,1][2,25]] => [[variable[0,:],variable[1,:]][variable[2,:],variable[25,:]]] char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) #50% de l'entrée # Char LSTM dim_words = tf.shape( char_embeddings )[1] #max dim word (time len)(or number of chars max of a word)[nombre de phrase(batch),nombre de mots max,time len, dim char 100] dim_chars = tf.shape( char_embeddings )[2] #dimension de char 100 [nombre de phrase(batch),nombre de mots max,time len ,dim char 100] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars'] ]) #[?,max len word(or time len),100] t = tf.transpose(flat, perm=[1, 0, 2]) #[max len word(or time len),?,100] time major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape( nchars, [-1])) #we take last state output = tf.concat( [output_fw, output_bw], axis=-1) #concat on the last D dimension of tensors 25+25 char_embeddings_lstm = tf.reshape( output, [-1, params['char_lstm_size'] * 2]) # [b,t,D] char_embeddings_lstm = tf.expand_dims(char_embeddings_lstm, -2) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings_cnn = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) char_embeddings_cnn = tf.reshape(char_embeddings_cnn, [-1, params['filters']]) char_embeddings_cnn = tf.expand_dims(char_embeddings_cnn, -2) #concat cnn and lstm char embeddings char_embeddings = tf.concat([char_embeddings_cnn, char_embeddings_lstm], axis=-2) #attention with tf.name_scope('Attention_layer'): attention_output, alphas = attention(char_embeddings, params['char_lstm_size'] * 2, time_major=False, return_alphas=True) tf.summary.histogram('alphas', alphas) char_embeddings = tf.reshape(attention_output, [-1, dim_words, params['char_lstm_size'] * 2]) # Word Embeddings word_ids = vocab_words.lookup( words ) #[[b'Peter', b'Blackburn'],[b'Yac', b'Amirat']] => [[b'0', b'1'],[b'2', b'3']] glove = np.load(params['glove'])[ 'embeddings'] # np.array glove made of vocab words (reduces list) variable = np.vstack([glove, [[0.] * params['dim']] ]) #concatenate on -1 axis, glove + [[0.]] variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup( variable, word_ids ) #[[b'0', b'1'],[b'2', b'3']] => [[b'variable[0]', b'variable[1]'],[b'variable[2]', b'variable[3]']] [2,2,300] # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) #concat on the last dimension axis 100+300 embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) #50% de l'entrée # LSTM for lstm t = tf.transpose( embeddings, perm=[1, 0, 2] ) # Need time-major #put the word dim as first dimension. check batch-major VS time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) #ELMO elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False) word_embeddings = elmo(inputs={ "tokens": words, "sequence_len": nwords }, signature="tokens", as_dict=True)["elmo"] # Concatenate output LSTM1 and ELMO Embeddings, dropout embeddings = tf.concat([word_embeddings, output], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM 2 t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm2_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense( output, num_tags ) #nn dense input : (output of bilstm), output dimension : same shape excpet last dim will be num of tags crf_params = tf.get_variable( "crf", [num_tags, num_tags], dtype=tf.float32) #variable of crf pars matrix num_tags*num_tags pred_ids, _ = tf.contrib.crf.crf_decode( logits, crf_params, nwords ) #decode_tags: A [batch_size, max_seq_len] matrix, with dtype tf.int32. Contains the highest scoring tag indices. #potentials(logits): A [batch_size, max_seq_len, num_tags] tensor of unary potentials. if mode == tf.estimator.ModeKeys.PREDICT: #prediction # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup( tf.to_int64(pred_ids) ) #indices = tf.constant([1, 5], tf.int64) => ["lake", "UNKNOWN"] predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file( params['tags']) #get tags index from file tags = vocab_tags.lookup(labels) #replace lables by thier indexes log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params ) #calculate log_likelihood given the real tags, return: A [batch_size] Tensor containing the log-likelihood of each example, given the sequence of tag indices. loss = tf.reduce_mean( -log_likelihood ) #Computes the mean of elements across dimensions of a tensor. x = tf.constant([[1., 1.], [2., 2.]]) tf.reduce_mean(x) # 1.5 # Metrics weights = tf.sequence_mask( nwords ) #convert the vector of size n to a matrix of bool of size n * max value in the vector v[1,2] ==> m[[true,false],[true, true]] metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision( tags, pred_ids, num_tags, indices, weights ), #ground truth, predictions, num of tags 9, The indices of the positive classes, 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar( metric_name, op[1] ) #for tensor board#tuple of (scalar float Tensor, update_op) op[1] => update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy. if mode == tf.estimator.ModeKeys.EVAL: #Eval return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: #training train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step() ) #adam optimizer operation to optimize the loss, global_step: Optional Variable to increment by one after the variables have been updated. return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving, features are a bit different if isinstance(features, dict): features = features['words'], features['nwords'] # Read vocabs and inputs dropout = params['dropout'] words, nwords = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file(params['words'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 # words 1d convolution for entity embeddings # 先对输入的words 左一层 con1d,保持 当个 word 的向量,同时可以构造 entity(多个word)的向量 weights = tf.sequence_mask(nchars) words_with_entity_embeddings = masked_conv1d_and_max( words, weights, params['char_filters'], params['char_kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['w2v'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) # [batch_size, max_seq_len, num_tags] pred_ids = tf.argmax(logits, -1) # crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) # pred_ids, pred_scores = tf.contrib.crf.crf_decode(logits, crf_params, nwords) input_mask = tf.sequence_mask(nwords, params["max_seq_len"], dtype=tf.float32) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings, # "scores": pred_scores } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) # log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( # logits, tags, nwords, crf_params) # loss = tf.reduce_mean(-log_likelihood) losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tags) masked_losses = tf.multiply(losses, input_mask) # loss += tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(input_mask)) loss = tf.reduce_sum(masked_losses) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): # For serving features are a bit different if params['elmo'] is False: pos = Position_Encoder(40) extrator = Cnn_extractor(params['lstm_size']) attention = Attention(params['lstm_size'], 2) if params['elmo'] is True: (words, nwords), (chars, nchars), (elmo_tokens, _) = features else: (words, nwords), (chars, nchars) = features # Read vocabs and inputs dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open(encoding='utf8') as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open(encoding='utf8') as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars + 1, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char 1d convolution weights = tf.sequence_mask(nchars) char_embeddings = masked_conv1d_and_max(char_embeddings, weights, params['filters'], params['kernel_size']) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) if params['elmo'] is True: bilm = BidirectionalLanguageModel(options_file, weight_file) context_embeddings_op = bilm(elmo_tokens) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_embeddings = elmo_context_input['weighted_op'] embeddings = tf.concat( [elmo_embeddings, word_embeddings, char_embeddings], axis=-1) else: embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) print(embeddings) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # additional if params['elmo'] is False: embeddings_ = pos(embeddings, nwords) cnn_features = extrator(embeddings_) else: cnn_features = extrator(embeddings) attn_weights, cnn_logits = attention(cnn_features, nwords) attn_weights_ = tf.nn.softmax(attn_weights, -1) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.concat([output, cnn_logits], -1) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = { 'pred_ids': pred_ids, 'tags': pred_strings, 'attn_weights': attn_weights_ } return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) attn_tags = tf.cast((tags > 0), tf.int32) attn_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=attn_weights, labels=attn_tags) attn_ids = tf.argmax(attn_weights, -1) attn_mask = tf.sequence_mask(nwords) attn_loss = tf.reduce_mean(tf.boolean_mask(attn_loss, attn_mask)) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) loss = loss + args.lambda_value * attn_loss # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), 'sac_acc': tf.metrics.accuracy(attn_tags, attn_ids, weights) } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)