def __init__(self, feature_type=None, elmo_type=None, pre_embed=None): if feature_type is None and elmo_type is None: exit(0) self.lr = Params.lr self.word_dim = Params.word_dim self.num_classes = Params.num_classes self.batch_size = Params.batch_size self.hidden_dim = Params.hidden_dim self.sen_len = Params.sen_max_len self.sen_num = Params.doc_max_sen self.batch_sen_num = self.batch_size * self.sen_num self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, self.sen_len], name='input_word') self.input_word_ELMO = tf.placeholder(dtype=tf.int32, shape=[None, None], name='input_word_ELMO') self.input_label = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='input_label') if feature_type in ['word', 'char']: self.word_embedding = tf.get_variable(initializer=pre_embed, name='word_embedding') all_input_words = tf.nn.embedding_lookup(self.word_embedding, self.input_word) all_input_words = tf.nn.dropout(all_input_words, self.keep_prob) layer1_forward = self.LSTM() layer1_backward = self.LSTM() with tf.variable_scope('LSTM'): all_output_words, _ = tf.nn.bidirectional_dynamic_rnn( layer1_forward, layer1_backward, all_input_words, dtype=tf.float32) all_output_words = tf.concat(axis=2, values=all_output_words) self.attention_w1 = tf.get_variable(name='attention_w1', shape=[2 * self.hidden_dim, 1]) word_alpha = tf.reshape( tf.nn.softmax( tf.reshape( tf.matmul( tf.reshape(all_output_words, [-1, 2 * self.hidden_dim]), self.attention_w1), [self.batch_sen_num, -1]), 1), [self.batch_sen_num, 1, -1]) all_output_sens = tf.reshape( tf.matmul(word_alpha, all_output_words), [-1, 2 * self.hidden_dim]) all_output_sens = tf.reshape( all_output_sens, [-1, self.sen_num, 2 * self.hidden_dim]) if elmo_type in ['word', 'char']: if elmo_type == 'word': options_file = Params.elmo_word_options_file weight_file = Params.elmo_word_weight_file embed_file = Params.elmo_word_embed_file else: options_file = Params.elmo_char_options_file weight_file = Params.elmo_char_weight_file embed_file = Params.elmo_char_embed_file bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_sen_num) bilm_embedding_op = bilm(self.input_word_ELMO) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) bilm_representation = bilm_embedding['weighted_op'] bilm_representation = tf.nn.dropout(bilm_representation, self.keep_prob) layer2_forward = self.LSTM() layer2_backward = self.LSTM() with tf.variable_scope('LSTM_ELMO'): elmo_output_words, _ = tf.nn.bidirectional_dynamic_rnn( layer2_forward, layer2_backward, bilm_representation, dtype=tf.float32) elmo_output_words = tf.concat(axis=2, values=elmo_output_words) self.attention_w2 = tf.get_variable(name='attention_w2', shape=[2 * self.hidden_dim, 1]) elmo_word_alpha = tf.reshape( tf.nn.softmax( tf.reshape( tf.matmul( tf.reshape(elmo_output_words, [-1, 2 * self.hidden_dim]), self.attention_w2), [self.batch_sen_num, -1]), 1), [self.batch_sen_num, 1, -1]) elmo_output_sens = tf.reshape( tf.matmul(elmo_word_alpha, elmo_output_words), [-1, 2 * self.hidden_dim]) elmo_output_sens = tf.reshape( elmo_output_sens, [-1, self.sen_num, 2 * self.hidden_dim]) if feature_type != None and elmo_type != None: all_output_sens = tf.concat( axis=2, values=[all_output_sens, elmo_output_sens]) elif elmo_type != None: all_output_sens = elmo_output_sens all_output_sens = tf.nn.dropout(all_output_sens, self.keep_prob) layer3_forward = self.LSTM() layer3_backward = self.LSTM() with tf.variable_scope('LSTM-SEN'): final_output_sens, _ = tf.nn.bidirectional_dynamic_rnn( layer3_forward, layer3_backward, all_output_sens, dtype=tf.float32) final_output_sens = tf.concat(axis=2, values=final_output_sens) self.attention_w3 = tf.get_variable(name='attention_w3', shape=[2 * self.hidden_dim, 1]) sen_alpha = tf.reshape( tf.nn.softmax( tf.reshape( tf.matmul( tf.reshape(final_output_sens, [-1, 2 * self.hidden_dim]), self.attention_w3), [-1, self.sen_num]), 1), [-1, 1, self.sen_num]) self.doc_rep = tf.reshape(tf.matmul(sen_alpha, final_output_sens), [-1, 2 * self.hidden_dim]) self.doc_rep = tf.nn.dropout(self.doc_rep, self.keep_prob) out = tf.layers.dense(self.doc_rep, self.num_classes, use_bias=True, activation=None) self.prob = tf.nn.softmax(out, 1) self.prediction = tf.argmax(self.prob, 1, name="prediction") self.accuracy = tf.cast( tf.equal(self.prediction, tf.argmax(self.input_label, 1)), "float") self.classfier_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=self.input_label)) self._classfier_train_op = tf.train.AdamOptimizer(self.lr).minimize( self.classfier_loss)
def create_model(self, share_dense=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, n_sent], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') all_input_expanded = tf.expand_dims(self.word_encoding, -1) c_outputs = [] for c in range(n_sub): pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.variable_scope('conv-maxpool-{}-{}'.format( c, filter_size)): # 卷积层 filter_shape = [filter_size, self.embed_size, 1, n_filters] W = tf.get_variable('W', initializer=tf.truncated_normal( filter_shape, stddev=0.1)) b = tf.get_variable('b', initializer=tf.constant( 0.1, shape=[n_filters])) conv = tf.nn.conv2d(all_input_expanded, W, strides=[1] * 4, padding='VALID', name='conv') h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') pooled = tf.nn.max_pool( h, ksize=[1, self.max_len - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='pool') pooled_outputs.append(pooled) num_filters_total = n_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flatten = tf.reshape(h_pool, [-1, 1, num_filters_total]) h_drop = tf.nn.dropout(h_pool_flatten, self.dropout_keep_prob) dense = tf.layers.dense(h_drop, 4, activation=None) c_outputs.append(dense) self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4]) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars']), features['elmo_input']) # Read vocabs and inputs ((words, nwords), (chars, nchars), elmo_inputs) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open(encoding="utf8") as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] options_file = "./options.json" weight_file = "./weights.hdf5" bilm = BidirectionalLanguageModel(options_file=options_file, weight_file=weight_file, use_character_inputs=True) ops = bilm(elmo_inputs) weight_op = weight_layers("nerelmo", ops)['weighted_op'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(char_embeddings)[1] dim_chars = tf.shape(char_embeddings)[2] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Word Embeddings word_ids = vocab_words.lookup(words) fasttext = np.load(params['fasttext'])['embeddings'] # np.array variable = np.vstack([fasttext, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32) #, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings, weight_op], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def create_model(self, share_dense=True, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') c_outputs = [] for c in range(n_sub): with tf.variable_scope('lstm-{}'.format(c)): # self.forward = self.LSTM() # self.backward = self.LSTM() # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32) # x = tf.concat(x, -1) #### cudnn lstm #### self.forward_lstm = cudnn_rnn.CudnnLSTM(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) self.forward_gru = cudnn_rnn.CudnnGRU(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) x, _ = self.forward_lstm(tf.transpose(self.word_encoding, [1, 0, 2])) x, _ = self.forward_gru(x) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope('pooling-{}'.format(c)): max_pooled = tf.reshape(tf.reduce_max(x, 1), [-1, 2*self.hidden_dim]) avg_pooled = tf.reshape(tf.reduce_mean(x, 1), [-1, 2*self.hidden_dim]) att_w = tf.get_variable(shape=[2*self.hidden_dim,self.hidden_dim], name='att_w') att_b = tf.get_variable(shape=[self.hidden_dim],name='att_b') att_v = tf.get_variable(shape=[self.hidden_dim,1],name='att_v') x_reshape = tf.reshape(x, [-1, 2*self.hidden_dim]) score = tf.reshape(tf.matmul(tf.nn.tanh(tf.matmul(x_reshape, att_w)) + att_b, att_v), [-1, 1, self.max_len]) alpha = tf.nn.softmax(score, axis=-1) att_pooled = tf.reshape(tf.matmul(alpha, x), [-1, 2*self.hidden_dim]) concat_pooled = tf.concat((max_pooled, att_pooled, avg_pooled), -1) concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob) dense = tf.layers.dense(concat_pooled, 4, activation=None) c_outputs.append(dense) self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4]) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4]))) # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0])) -class1_weight * (y[:, 1]*tf.log(y_[:, 1])) -class2_weight * (y[:, 2]*tf.log(y_[:, 2])) -class3_weight * (y[:, 3]*tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def _check_weighted_layer(self, l2_coef, do_layer_norm, use_top_only): # create the Batcher vocab_file = os.path.join(FIXTURES, 'vocab_test.txt') batcher = Batcher(vocab_file, 50) # load the model options_file = os.path.join(FIXTURES, 'options.json') weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5') character_ids = tf.placeholder('int32', (None, None, 50)) model = BidirectionalLanguageModel( options_file, weight_file, max_batch_size=4) bilm_ops = model(character_ids) weighted_ops = [] for k in range(2): ops = weight_layers(str(k), bilm_ops, l2_coef=l2_coef, do_layer_norm=do_layer_norm, use_top_only=use_top_only) weighted_ops.append(ops) # initialize self.sess.run(tf.global_variables_initializer()) n_expected_trainable_weights = 2 * (1 + int(not use_top_only)) self.assertEqual(len(tf.trainable_variables()), n_expected_trainable_weights) # and one regularizer per weighted layer n_expected_reg_losses = 2 * int(not use_top_only) self.assertEqual( len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), n_expected_reg_losses, ) # Set the variables. weights = [[np.array([0.1, 0.3, 0.5]), np.array([1.1])], [np.array([0.2, 0.4, 0.6]), np.array([0.88])]] for k in range(2): with tf.variable_scope('', reuse=True): if not use_top_only: W = tf.get_variable('{}_ELMo_W'.format(k)) _ = self.sess.run([W.assign(weights[k][0])]) gamma = tf.get_variable('{}_ELMo_gamma'.format(k)) _ = self.sess.run([gamma.assign(weights[k][1])]) # make some data sentences = [ ['The', 'first', 'sentence', '.'], ['The', 'second'], ['Third'] ] X_chars = batcher.batch_sentences(sentences) ops = model(character_ids) lm_embeddings, mask, weighted0, weighted1 = self.sess.run( [ops['lm_embeddings'], ops['mask'], weighted_ops[0]['weighted_op'], weighted_ops[1]['weighted_op']], feed_dict={character_ids: X_chars} ) actual_elmo = [weighted0, weighted1] # check the mask first expected_mask = [[True, True, True, True], [True, True, False, False], [True, False, False, False]] self.assertTrue((expected_mask == mask).all()) # Now compute the actual weighted layers for k in range(2): normed_weights = np.exp(weights[k][0] + 1.0 / 3) / np.sum( np.exp(weights[k][0] + 1.0 / 3)) # masked layer normalization expected_elmo = np.zeros((3, 4, lm_embeddings.shape[-1])) if not use_top_only: for j in range(3): # number of LM layers if do_layer_norm: mean = np.mean(lm_embeddings[:, j, :, :][mask]) std = np.std(lm_embeddings[:, j, :, :][mask]) normed_lm_embed = (lm_embeddings[:, j, :, :] - mean) / ( std + 1E-12) expected_elmo += normed_weights[j] * normed_lm_embed else: expected_elmo += normed_weights[j] * lm_embeddings[ :, j, :, :] else: expected_elmo += lm_embeddings[:, -1, :, :] # the scale parameter expected_elmo *= weights[k][1] self.assertTrue( np.allclose(expected_elmo, actual_elmo[k], atol=1e-6) )
def create_model(self, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, 10, 4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') self.layer_embedding = tf.get_variable(shape=[10, self.hidden_dim], name='layer_embedding') layer_reshape = tf.reshape(self.layer_embedding, [1, 10, 1, self.hidden_dim]) layer_reshape_tile = tf.tile(layer_reshape, [self.batch_size, 1, self.max_len, 1]) self.forward = self.LSTM() self.backwad = self.LSTM() self.forward2 = self.LSTM() self.backwad2 = self.LSTM() with tf.variable_scope('sentence_encode'): s1_out, _ = tf.nn.bidirectional_dynamic_rnn(self.forward, self.backwad, self.word_encoding, dtype=tf.float32) # output_sentence = 0.5*(all_output_words[0] + all_output_words[1]) s1_out = tf.concat(axis=2, values=s1_out) s1_reshape = tf.reshape(s1_out, [-1, 1, self.max_len, 2 * self.hidden_dim]) s1_tile = tf.tile(s1_reshape, [1, 10, 1, 1]) # 第一层lstm复制10份 s2_input = tf.reshape(tf.concat((s1_tile, layer_reshape_tile), -1), [-1, self.max_len, 3 * self.hidden_dim]) with tf.variable_scope('sentence_encode2'): s2_out, _ = tf.nn.bidirectional_dynamic_rnn(self.forward2, self.backwad2, s2_input, dtype=tf.float32) # output_sentence = 0.5*(all_output_words[0] + all_output_words[1]) s2_out = tf.reshape(tf.concat(axis=-1, values=s2_out), [-1, 10, self.max_len, 2 * self.hidden_dim]) res_out = s2_out + s1_tile res_dense = tf.layers.dense(res_out, self.hidden_dim, activation=tf.nn.relu) res_layer_concat = tf.reshape( tf.concat((res_dense, layer_reshape_tile), -1), [-1, 2 * self.hidden_dim]) self.att_w = tf.get_variable( shape=[2 * self.hidden_dim, self.hidden_dim], name='att_w') self.att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b') self.att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v') score = tf.reshape( tf.matmul( tf.nn.tanh( tf.matmul(res_layer_concat, self.att_w) + self.att_b), self.att_v), [-1, 1, self.max_len]) alpha = tf.nn.softmax(score) layer_sentence = tf.reshape( tf.matmul( alpha, tf.reshape(res_out, [-1, self.max_len, 2 * self.hidden_dim])), [-1, n_sub, 2 * self.hidden_dim]) if concat_sub: # 是否拼接layer_sub信息 layer_sub = tf.reshape(self.layer_embedding, [1, n_sub, self.hidden_dim]) layer_sub_tile = tf.tile(layer_sub, [self.batch_size, 1, 1]) layer_total = tf.concat((layer_sentence, layer_sub_tile), -1) outputs = tf.reshape(layer_total, [-1, 3 * self.hidden_dim]) else: outputs = tf.reshape(layer_sentence, [-1, 2 * self.hidden_dim]) self.logits = tf.layers.dense(outputs, 4, activation=None) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, 10, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) self.loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y2, [-1, 4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def create_model(self): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, 10, 4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') self.input_ids = tf.placeholder(dtype=tf.int32, shape=[None, 190], name='input_ids') self.mask_ids = tf.placeholder(dtype=tf.int32, shape=[None, 190], name='mask_ids') self.type_ids = tf.placeholder(dtype=tf.int32, shape=[None, 190], name='type_ids') self.is_training = tf.placeholder(dtype=tf.bool, name='is_training') # bert_hidden_size = bert_output_layer.shape[-1].value # hidden_size = output_layer.shape[-1].value if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') self.layer_embedding = tf.get_variable(shape=[10, self.hidden_dim], name='layer_embedding') self.forward = self.LSTM() self.backwad = self.LSTM() # self.forward2 = self.LSTM() # self.backwad2 = self.LSTM() # add point self.forward2 = self.GRU() self.backwad2 = self.GRU() # bert使用 bert_config = modeling.BertConfig.from_json_file( self.config.BERT_CONFIG_FILES) bert_model = modeling.BertModel(config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.mask_ids, token_type_ids=self.type_ids) if self.is_training is not None: print('bert config hidden dropout -- ---', bert_config.hidden_dropout_prob) print('bert config hidden dropout -- ---', bert_config.attention_probs_dropout_prob) self.word_encoding = bert_model.get_sequence_output() all_layer_output = bert_model.get_all_encoder_layers() self.word_encoding = (all_layer_output[0] + all_layer_output[1] + all_layer_output[2] + all_layer_output[3]) / 4 with tf.variable_scope('sentence_encode'): all_output_words, _ = tf.nn.bidirectional_dynamic_rnn( self.forward, self.backwad, self.word_encoding, dtype=tf.float32) # output_sentence = 0.5*(all_output_words[0] + all_output_words[1]) output_sentence = tf.concat(axis=2, values=all_output_words) with tf.variable_scope('sentence_encode2'): all_output_words, _ = tf.nn.bidirectional_dynamic_rnn( self.forward2, self.backwad2, output_sentence, dtype=tf.float32) # output_sentence = 0.5*(all_output_words[0] + all_output_words[1]) output_sentence = tf.concat(axis=2, values=all_output_words) output_sentence = tf.layers.dense(output_sentence, self.hidden_dim, activation=tf.nn.tanh) sentence_reshape = tf.reshape(output_sentence, [-1, 1, self.max_len, self.hidden_dim]) sentence_reshape_tile = tf.tile(sentence_reshape, [1, 10, 1, 1]) # 句子复制10份 layer_reshape = tf.reshape(self.layer_embedding, [1, 10, 1, self.hidden_dim]) layer_reshape_tile = tf.tile(layer_reshape, [self.batch_size, 1, self.max_len, 1]) embed_concat = tf.reshape( tf.concat(axis=3, values=[sentence_reshape_tile, layer_reshape_tile]), [-1, 2 * self.hidden_dim]) self.att_w = tf.get_variable( shape=[2 * self.hidden_dim, self.hidden_dim], name='att_w') self.att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b') self.att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v') score = tf.reshape( tf.matmul( tf.nn.tanh(tf.matmul(embed_concat, self.att_w) + self.att_b), self.att_v), [-1, 10, self.max_len]) alpah = tf.nn.softmax(score, axis=2) layer_sentence = tf.matmul(alpah, output_sentence) layer_reshape2 = tf.reshape(self.layer_embedding, [1, 10, self.hidden_dim]) layer_reshape2_tile = tf.tile(layer_reshape2, [self.batch_size, 1, 1]) layer_sentence = tf.concat( axis=2, values=[layer_sentence, layer_reshape2_tile]) layer_sentence = tf.reshape(layer_sentence, [-1, 2 * self.hidden_dim]) layer_sentence = tf.layers.dense(layer_sentence, self.hidden_dim, activation=tf.nn.relu) # add point layer_sentence = tf.nn.dropout(layer_sentence, self.dropout_keep_prob) self.logits = tf.layers.dense(layer_sentence, 4, activation=None) y_ = tf.nn.softmax(self.logits, axis=1) self.prob = tf.reshape(y_, [-1, 10, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) self.loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y2, [-1, 4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def create_model(self, share_dense=True, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') inputs_expanded = tf.expand_dims(self.word_encoding, -1) n_filters = 128 filter_shape = [3, self.embed_size, 1, n_filters] W = tf.get_variable(initializer=tf.truncated_normal(filter_shape, stddev=0.1), name='W') b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[n_filters])) conv = tf.nn.conv2d(inputs_expanded, W, strides=[1]*4, padding='VALID', name='conv2d') h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') h = tf.reshape(h, [-1, self.max_len-3+1, n_filters]) self.forward = self.LSTM() self.backward = self.LSTM() x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, h, dtype=tf.float32) x = tf.concat(x, -1) output_sentence = tf.layers.dense(x, self.hidden_dim, activation=tf.nn.relu) x_reshape = tf.reshape(output_sentence, [-1, 1, self.max_len-3+1, self.hidden_dim]) x_tile = tf.tile(x_reshape, [1, n_sub, 1, 1]) # 句子复制n_sub份 sub_embedding = tf.get_variable(shape=[n_sub, self.hidden_dim], name='sub_embedding') sub_reshape = tf.reshape(sub_embedding, [1, n_sub, 1, self.hidden_dim]) sub_tile = tf.tile(sub_reshape, [self.batch_size, 1, self.max_len-3+1, 1]) embed_concat = tf.reshape(tf.concat((x_tile, sub_tile), -1), [-1, 2*self.hidden_dim]) att_w = tf.get_variable(shape=[2*self.hidden_dim, self.hidden_dim], name='att_w') att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b') att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v') score = tf.matmul(tf.nn.tanh(tf.matmul(embed_concat, att_w) + att_b), att_v) score_fit = tf.reshape(score, [-1, n_sub, self.max_len-3+1]) alpha = tf.nn.softmax(score_fit) layer_sentence = tf.matmul(alpha, output_sentence) if concat_sub: # 是否拼接layer_sub信息 layer_sub = tf.reshape(sub_embedding, [1, n_sub, self.hidden_dim]) layer_sub_tile = tf.tile(layer_sub, [self.batch_size, 1, 1]) layer_total = tf.concat((layer_sentence, layer_sub_tile), -1) outputs = tf.reshape(layer_total, [-1, 2*self.hidden_dim]) else: outputs = tf.reshape(layer_sentence, [-1, self.hidden_dim]) self.logits = tf.layers.dense(layer_sentence, 4, activation=None) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, 10, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4]))) # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0])) -class1_weight * (y[:, 1]*tf.log(y_[:, 1])) -class2_weight * (y[:, 2]*tf.log(y_[:, 2])) -class3_weight * (y[:, 3]*tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def create_model(self, share_dense=True, concat_sub=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y') self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, 4], name='input_y2') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') c_outputs = [] for c in range(n_sub): with tf.variable_scope('lstm-{}'.format(c)): # self.forward = self.LSTM() # self.backward = self.LSTM() # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32) # x = tf.concat(x, -1) #### cudnn lstm #### self.forward = cudnn_rnn.CudnnLSTM( num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32) x, _ = self.forward(tf.transpose(self.word_encoding, [1, 0, 2])) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope('conv-{}'.format(c)): inputs_expanded = tf.expand_dims(x, -1) filter_shape = [3, 2 * self.hidden_dim, 1, n_filters] W = tf.get_variable(initializer=tf.truncated_normal( filter_shape, stddev=0.1), name='W') b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[n_filters])) conv = tf.nn.conv2d(inputs_expanded, W, strides=[1] * 4, padding='VALID', name='conv2d') h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') max_pooled = tf.nn.max_pool( h, ksize=[1, self.max_len - 3 + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='max_pool') avg_pooled = tf.nn.avg_pool( h, ksize=[1, self.max_len - 3 + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name='avg_pool') concat_pooled = tf.reshape( tf.concat((max_pooled, avg_pooled), -1), [-1, 2 * n_filters]) concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob) dense = tf.layers.dense(concat_pooled, 4, activation=None) c_outputs.append(dense) self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4]) y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) self.loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y2, [-1, 4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self
def model_fn(features, labels, mode, params): # For serving, features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), features['elmo_input']) # Read vocabs and inputs dropout = params['dropout'] ((words, nwords), elmo_inputs) = features training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open(encoding="utf8") as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 options_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\checkpoint\options.json" weight_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\weights.hdf5" bilm = BidirectionalLanguageModel(options_file=options_file, weight_file=weight_file, use_character_inputs=True) ops = bilm(elmo_inputs) weight_op = weight_layers("nerelmo", ops, use_top_only=True)['weighted_op'] # elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True) # embeddings = elmo( # inputs={ # "tokens": words, # "sequence_len": nwords # }, # signature="tokens", # as_dict=True)["elmo"] # Word Embeddings # from allennlp.modules.elmo import Elmo, batch_to_ids # # options_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\checkpoint\options.json" # weight_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\weights.hdf5" # # elmo = Elmo(options_file, weight_file, 2, dropout=0) # # # use batch_to_ids to convert sentences to character ids # character_ids = batch_to_ids(words) # # print(character_ids[0].shape) # # print(len(character_ids)) # # embeddings = elmo(character_ids) # print(embeddings['elmo_representations']) # word_ids = vocab_words.lookup(words) # BiLM = BidirectionalLanguageModel(options_file, weight_file) # ops = BiLM(word_ids) # weight_op = weight_layers("name", ops)['weighted_op'] # glove = np.load(params['W2V'])['embeddings'] # np.array # variable = np.vstack([glove, [[0.]*params['dim']]]) # variable = tf.Variable(variable, dtype=tf.float32, trainable=False) # embeddings = tf.nn.embedding_lookup(variable, word_ids) # embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(weight_op, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def create_model(self, share_dense=True): self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, n_sub, n_sent], name='input_y') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob') if self.main_feature.lower() in ['word', 'char']: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len], name='input_x') self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding') self.word_encoding = tf.nn.embedding_lookup( self.embedding, self.input_x) self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new elif self.main_feature.lower() in [ 'elmo_word', 'elmo_char', 'elmo_qiuqiu' ]: self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, self.max_len + 2], name='input_x') if self.main_feature == 'elmo_word': options_file = self.config.elmo_word_options_file weight_file = self.config.elmo_word_weight_file embed_file = self.config.elmo_word_embed_file elif self.main_feature == 'elmo_char': options_file = self.config.elmo_char_options_file weight_file = self.config.elmo_char_weight_file embed_file = self.config.elmo_char_embed_file elif self.main_feature == 'elmo_qiuqiu': options_file = self.config.elmo_qiuqiu_options_file weight_file = self.config.elmo_qiuqiu_weight_file embed_file = self.config.elmo_qiuqiu_embed_file self.bilm = BidirectionalLanguageModel( options_file, weight_file, use_character_inputs=False, embedding_weight_file=embed_file, max_batch_size=self.batch_size) bilm_embedding_op = self.bilm(self.input_x) bilm_embedding = weight_layers('output', bilm_embedding_op, l2_coef=0.0) self.word_encoding = bilm_embedding['weighted_op'] self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new else: exit('wrong feature') rcnn_outputs = [] for i in range(n_sub): with tf.variable_scope('rcnn_output_%d' % i): output_bigru = self.bi_gru(self.word_encoding, hidden_size) output = self.textcnn(output_bigru, self.max_len) rcnn_outputs.append(output) n_filter_total = n_filter * len(filter_sizes) outputs = tf.reshape(tf.concat(rcnn_outputs, 1), (-1, n_sub, n_filter_total)) if share_dense: cnn_outputs = tf.reshape(outputs, (-1, n_filter_total)) W = tf.get_variable('W', shape=[n_filter_total, self.n_classes]) b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[self.n_classes ])) self.logits = tf.nn.xw_plus_b(cnn_outputs, W, b, name='scores') else: cnn_outputs = tf.reshape(tf.concat(outputs, 1), (-1, n_sub, n_filter_total)) W = tf.get_variable( 'W', shape=[self.batch_size, n_filter_total, self.n_classes]) b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[self.n_classes ])) self.logits = tf.nn.xw_plus_b(cnn_outputs, W, b, name='scores') y_ = tf.nn.softmax(self.logits) self.prob = tf.reshape(y_, [-1, n_sub, 4]) self.prediction = tf.argmax(self.prob, 2, name="prediction") if not self.config.balance: self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape( self.input_y, [-1, 4]))) else: # class0_weight = 0.882 * self.n_classes # 第0类的权重系数 # class1_weight = 0.019 * self.n_classes # 第1类的权重系数 # class2_weight = 0.080 * self.n_classes # 第2类的权重系数 # class3_weight = 0.019 * self.n_classes # 第3类的权重系数 class0_weight = 1 # 第0类的权重系数 class1_weight = 3 # 第1类的权重系数 class2_weight = 3 # 第2类的权重系数 class3_weight = 3 # 第3类的权重系数 # coe = tf.constant([1., 1., 1., 1.]) # y = tf.reshape(self.input_y, [-1, 4]) * coe # self.loss = -tf.reduce_mean(y * tf.log(y_)) y = tf.reshape(self.input_y, [-1, 4]) self.loss = tf.reduce_mean(-class0_weight * (y[:, 0] * tf.log(y_[:, 0])) - class1_weight * (y[:, 1] * tf.log(y_[:, 1])) - class2_weight * (y[:, 2] * tf.log(y_[:, 2])) - class3_weight * (y[:, 3] * tf.log(y_[:, 3]))) # tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2])) return self