class CQABertModel(nn.Module): """ The BERT encoder adapted for our implementation to include 'history_answer_marker' input """ def __init__(self, args, config): super(CQABertModel, self).__init__() self.args = args self.model = BertModel(config, self.args) if self.args.n_gpu > 1 and not self.args.no_cuda: self.model = nn.DataParallel(self.model) self.model.to(args.device) def forward(self, input_ids, input_mask, segment_ids, history_answer_marker, use_one_hot_embeddings): inputs = { "input_ids": input_ids.to(self.args.device), "attention_mask": input_mask.to(self.args.device), "token_type_ids": segment_ids.to(self.args.device), "history_answer_marker": history_answer_marker.to(self.args.device), } outputs = self.model(**inputs) sequence_output = outputs[ 0] # final hidden layer, with dimensions [batch_size, max_seq_len, hidden_size] pooled_output = outputs[ 1] # entire sequence representation/embedding of 'CLS' token return sequence_output, pooled_output
def __init__(self, args, config): super(CQABertModel, self).__init__() self.args = args self.model = BertModel(config, self.args) if self.args.n_gpu > 1 and not self.args.no_cuda: self.model = nn.DataParallel(self.model) self.model.to(args.device)
class Prediction: def __init__(self): self.model_class = BertModel() self.estimator, self.tokenizer = self.model_class.get_estmator() def getPrediction(self, in_sentences): labels = ["Negative", "Positive"] label_list = [0, 1] MAX_SEQ_LENGTH = 130 input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences ] input_features = run_classifier.convert_examples_to_features( input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False) predictions = self.estimator.predict(predict_input_fn) return [(sentence, labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)] def get_prediction(self, sentence_list): # pred_sentences = [ # "Anyway, thanks for the kind reply. Btw, I am still a SingTel subscriber", # "Why is internet so thrash today @Singtel" # ] predictions = self.getPrediction(sentence_list) return predictions
def get_network(self): # If network already loaded simply return if self._network is not None: return self._network # If checkpoint file is available, load from checkpoint state_dict = self.get_trainer().try_load_statedict_from_checkpoint() self._network = BertModel(self._bert_model_name, self.get_label_mapper().num_classes, fine_tune=self.fine_tune) if state_dict is not None: # Only load from BERT pretrained when no checkpoint is available self._logger.info("checkpoint models found") self._network.load_state_dict(state_dict) return self._network
def __init__(self, config: BertConfig): super(Seq2SeqModel, self).__init__() # 获取配置信息 self.hidden_dim = config.hidden_size self.vocab_size = config.vocab_size # encoder and decoder self.bert = BertModel(config) self.decoder = BertLMPredictionHead( config, self.bert.embeddings.word_embeddings.weight) # 加载字典和分词器 self.word2ix = load_bert_vocab() self.tokenizer = Tokenizer(self.word2ix)
def test_forward(self): # Bert Config vocab_size = 10 sequence_len = 20 batch = 32 num_classes = 3 expected_shape = (batch, num_classes) input_batch = torch.randint(low=0, high=vocab_size - 1, size=(batch, sequence_len)) config = transformers.BertConfig(vocab_size=vocab_size, hidden_size=10, num_hidden_layers=1, num_attention_heads=1, num_labels=num_classes) sut = BertModel(None, None, bert_config=config) # Act actual = sut.forward(input_batch)[0] # Assert self.assertEqual(expected_shape, actual.shape)
def get_network(self): # If network already loaded simply return if self._network is not None: return self._network # If checkpoint file is available, load from checkpoint self._network = self.get_trainer().try_load_model_from_checkpoint() # Only load from BERT pretrained when no checkpoint is available if self._network is None: self._logger.info( "No checkpoint models found.. Loading pretrained BERT {}". format(self._bert_model_name)) self._network = BertModel(self._bert_model_name, self.get_label_mapper().num_classes, fine_tune=self.fine_tune) return self._network
import argparse import numpy as np import tensorflow as tf from flyai.dataset import Dataset import config from model import Model from bert_model import BertModel parser = argparse.ArgumentParser() parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size") parser.add_argument("-e", "--EPOCHS", default=8, type=int, help="train epochs") args = parser.parse_args() dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) modelpp = Model(dataset) model = BertModel() def learning_rate_decay(learning_rate): return learning_rate * 0.5 def evaluate(sess): """评估在某一数据上的准确率和损失""" x_val_all, y_val_all = dataset.get_all_validation_data() data_len = len(y_val_all) index = np.random.permutation(len(y_val_all)) n_batches = len(y_val_all) // args.BATCH + 1 total_loss = 0.0 total_acc = 0.0 x_input_ids_val = x_val_all[0]
class Builder: def __init__(self, train_data, val_data, labels_file, model_dir, num_workers=None, checkpoint_dir=None, epochs=10, early_stopping_patience=10, checkpoint_frequency=1, grad_accumulation_steps=8, batch_size=8, max_seq_len=512, learning_rate=0.00001, fine_tune=True): self.model_dir = model_dir self.fine_tune = fine_tune self.learning_rate = learning_rate self.checkpoint_frequency = checkpoint_frequency self.grad_accumulation_steps = grad_accumulation_steps self.early_stopping_patience = early_stopping_patience self.epochs = epochs self.checkpoint_dir = checkpoint_dir self.train_data = train_data self.val_data = val_data self.labels_file = labels_file self.batch_size = batch_size # Note: Since the max seq len for pos embedding is 512 , in the pretrained bert this must be less than eq to 512 # Also note increasing the length greater also will create GPU out of mememory error self._max_seq_len = max_seq_len self.num_workers = num_workers or os.cpu_count() - 1 if self.num_workers <= 0: self.num_workers = 0 self._network = None self._train_dataloader = None self._train_dataset = None self._val_dataset = None self._val_dataloader = None self._trainer = None self._lossfunc = None self._optimiser = None self._label_mapper = None self._bert_model_name = "bert-base-cased" self._token_lower_case = False def get_preprocessor(self): self._logger.info("Retrieving Tokeniser") tokeniser = BertTokenizer.from_pretrained(self._bert_model_name, do_lower_case=self._token_lower_case) preprocessor = PreprocessorBertTokeniser(max_feature_len=self._max_seq_len, tokeniser=tokeniser) self._logger.info("Completed retrieving Tokeniser") return preprocessor def get_network(self): # If network already loaded simply return if self._network is not None: return self._network self._logger.info("Retrieving model") # If checkpoint file is available, load from checkpoint state_dict = self.get_trainer().try_load_statedict_from_checkpoint() self._network = BertModel(self._bert_model_name, self.get_label_mapper().num_classes, fine_tune=self.fine_tune) if state_dict is not None: # Only load from BERT pretrained when no checkpoint is available self._logger.info("checkpoint models found") self._network.load_state_dict(state_dict) self._logger.info("Retrieving model complete") return self._network def get_train_dataset(self): if self._train_dataset is None: self._train_dataset = DbpediaDataset(self.train_data, preprocessor=self.get_preprocessor()) return self._train_dataset def get_val_dataset(self): if self._val_dataset is None: self._val_dataset = DbpediaDataset(self.val_data, preprocessor=self.get_preprocessor()) return self._val_dataset def get_label_mapper(self): if self._label_mapper is None: self._label_mapper = DbpediaLabelMapper(self.labels_file) return self._label_mapper def get_pos_label_index(self): return self.get_label_mapper().positive_label_index def get_train_val_dataloader(self): if self._train_dataloader is None: self._train_dataloader = DataLoader(dataset=self.get_train_dataset(), num_workers=self.num_workers, batch_size=self.batch_size, shuffle=True) if self._val_dataloader is None: self._val_dataloader = DataLoader(dataset=self.get_val_dataset(), num_workers=self.num_workers, batch_size=self.batch_size, shuffle=False) return self._train_dataloader, self._val_dataloader def get_loss_function(self): if self._lossfunc is None: self._lossfunc = nn.CrossEntropyLoss() return self._lossfunc def get_optimiser(self): if self._optimiser is None: self._optimiser = Adam(params=self.get_network().parameters(), lr=self.learning_rate) return self._optimiser def get_trainer(self): if self._trainer is None: self._trainer = Train(model_dir=self.model_dir, epochs=self.epochs, early_stopping_patience=self.early_stopping_patience, checkpoint_frequency=self.checkpoint_frequency, checkpoint_dir=self.checkpoint_dir, accumulation_steps=self.grad_accumulation_steps) return self._trainer @property def _logger(self): return logging.getLogger(__name__)
def build_model(self): """ Tried Models : Try 1 : (bi-LSTM (Char) + Word Embeddings) + bi-LSMT + CRF Try 2 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + bi-LSTM + CRF Try 3 : (CharCNN (Char) + Word Embeddings) + Highwat Network x 2 + bi-LSTM + multi-head-attention + CRF Try 4 : (CharCNN (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF Try 5 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF : F1 66.8577 Try 6 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + Residual + CRF : F1 68.4807 Try 7 : (bi-LSTM (Char) + Word Embeddings) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : F1 71.5 Try 8 : (bi-LSTM (Char) + BERT) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : 78.8 Try 9 : (bi-LSTM (Char) + BERT + WordEmbedding) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : """ self._build_placeholder() word_bert_config = BertConfig( vocab_size=self.parameter["embedding"][0][1], hidden_size=self.parameter["word_embedding_size"], num_hidden_layers=self.parameter["num_hidden_layers"], num_attention_heads=self.parameter["num_attention_heads"], intermediate_size=self.parameter["intermediate_size"], # hidden_dropout_prob=1. - self.dropout_rate, # attention_probs_dropout_prob=1. - self.dropout_rate, max_position_embeddings=self.parameter["sentence_length"], type_vocab_size=self.parameter["n_class"]) word_bert_model = BertModel( config=word_bert_config, is_training=self.parameter["mode"] == "train", input_ids=self.morph, input_mask=None, # tf.sign(tf.abs(self.morph)), # None, token_type_ids=self.label if self.parameter["mode"] == "train" else None, # None, use_one_hot_embeddings=False, scope="WordBertModel") word_bert_output = word_bert_model.get_sequence_output() print("[*] Word BERT output : ", word_bert_output.get_shape().as_list()) # { "morph": 0, "morph_tag": 1, "tag" : 2, "character": 3, .. } # for item in self.parameter["embedding"]: # self._embedding_matrix.append(self._build_embedding(item[1], item[2], name="embedding_" + item[0])) # self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph)) # self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[1], self.character)) # self._embeddings[0] = tf.nn.dropout(self._embeddings[0], self.dropout_rate) char_embeddings = self._build_embedding( self.parameter["embedding"][1][1], self.parameter["embedding"][1][2], name="embedding_" + self.parameter["embedding"][1][0]) char_embeddings = tf.nn.embedding_lookup(char_embeddings, self.character) character_embedding = tf.reshape(char_embeddings, [ -1, self.parameter["word_length"], self.parameter["char_embedding_size"] ]) char_len = tf.reshape(self.character_len, [-1]) char_embs_rnn = self._build_birnn_model( character_embedding, seq_len=char_len, lstm_units=self.parameter["char_lstm_units"], keep_prob=self.dropout_rate, last=True, scope="char_layer") char_embs_rnn_size = char_embs_rnn.get_shape().as_list() print("[*] Character Embedding RNN size : ", char_embs_rnn_size) """ with tf.name_scope("CharCNN"): ks = [2, 3, 4, 5] fs = 16 pooled_out = [] for idx, k in enumerate(ks): x = tf.layers.conv1d(character_embedding, filters=fs, kernel_size=k, dilation_rate=2 ** idx, kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg, padding='same', name="dilatedCNN-%d" % idx) x = tf.nn.relu(x) x = tf.reduce_max(x, axis=1) pooled_out.append(x) char_embs_cnn = tf.concat(pooled_out, axis=1) char_embs_cnn = tf.reshape(char_embs_cnn, (-1, self.parameter["sentence_length"], fs * len(ks))) char_embs_cnn_size = char_embs_cnn.get_shape().as_list() print("[*] Character Embedding CNN size : ", char_embs_cnn_size) """ all_data_emb = tf.concat( [self.ne_dict, word_bert_output, char_embs_rnn], axis=2) print("[*] Embeddings : ", all_data_emb.get_shape().as_list()) dense_bi_lstm = DenselyConnectedBiRNN( num_layers=self.parameter["num_dc_layer"], num_units=self.parameter["lstm_units"], num_last_units=self.parameter["lstm_units"], keep_prob=self.dropout_rate)(all_data_emb, seq_len=self.sequence) print("[*] DC-bi-LSTM : ", dense_bi_lstm.get_shape().as_list()) dense_bi_lstm = tf.reshape( dense_bi_lstm, (-1, dense_bi_lstm.get_shape().as_list()[-1])) print("[*] DC-bi-LSTM-reshape : ", dense_bi_lstm.get_shape().as_list()) residual = tf.layers.dense(tf.reshape( all_data_emb, (-1, all_data_emb.get_shape().as_list()[-1])), units=2 * self.parameter["lstm_units"], kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg) dense_bi_lstm += residual # tf.concat([context, p_context], axis=-1) outputs = tf.nn.dropout(dense_bi_lstm, self.dropout_rate) outputs = tf.reshape(outputs, (-1, dense_bi_lstm.get_shape().as_list()[-1])) # outputs = tf.reshape(outputs, (-1, self.parameter["sentence_length"], outputs.get_shape().as_list()[-1])) print("[*] outputs size : ", outputs.get_shape().as_list()) """ outputs = self._build_birnn_model(outputs, self.sequence, self.parameter["lstm_units"], self.dropout_rate, scope="bi-LSTM") print("[*] outputs size : ", outputs.get_shape().as_list()) """ """ # outputs = tf.nn.dropout(outputs, self.dropout_rate) # outputs = layer_norm_and_dropout(outputs, self.dropout_rate) outputs = self._build_birnn_model(all_data_emb, self.sequence, self.parameter["lstm_units"], self.dropout_rate, scope="bi-LSTM") print("[*] outputs size : ", outputs.get_shape().as_list()) """ """ with tf.variable_scope("stacked-bi-LSTM"): fw_cell = self._build_multi_cell(self.parameter["lstm_units"], self.dropout_rate, self.parameter["num_lstm_depth"]) bw_cell = self._build_multi_cell(self.parameter["lstm_units"], self.dropout_rate, self.parameter["num_lstm_depth"]) outputs = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, dtype=tf.float32, inputs=all_data_emb, sequence_length=self.sequence, scope="birnn") (output_fw, output_bw), _ = outputs outputs = tf.concat([output_fw, output_bw], axis=2) outputs = tf.reshape(outputs, shape=[-1, outputs.get_shape().as_list()[-1]]) print("[*] outputs size : ", outputs.get_shape().as_list()) """ sentence_output = tf.layers.dense(outputs, units=self.parameter["n_class"], kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg) print("[*] sentence_output size : ", sentence_output.get_shape().as_list()) crf_cost, crf_weight, crf_bias = self._build_crf_layer(sentence_output) costs = crf_cost self.train_op = self._build_output_layer(costs) self.cost = costs print("[+] Model loaded!")
type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=16, type=int, help="batch size") args = parser.parse_args() # 数据获取辅助类 dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) # 模型操作辅助类 modelpp = Model(dataset) # with tf.name_scope("summary"): # tf.summary.scalar("loss", loss) # tf.summary.scalar("acc", accuracy) # merged_summary = tf.summary.merge_all() learning_rate = 5e-5 model = BertModel(modelpp) def learning_rate_decay(learning_rate): return learning_rate * 0.5 def evaluate(sess): """评估在某一数据上的准确率和损失""" x_val_all, y_val_all = dataset.get_all_validation_data() data_len = len(y_val_all) index = np.random.permutation(len(y_val_all)) batch_len = 32 n_batches = len(y_val_all) // batch_len + 1 total_loss = 0.0 total_acc = 0.0
def main(): """ MAIN OHOHOHOHOHO """ sentiment_lexicon_dict = get_sentiment_lexicon(SENTIMENT_LEXICON_DIR) evaluation = Evaluation(DATASET_DIR) # RandomModel evaluation.evaluate(RandomModel()) # MajorityModel evaluation.evaluate(MajorityModel()) # LexiconFeaturesModel evaluation.evaluate(LexiconFeaturesModel(sentiment_lexicon_dict)) evaluation.evaluate( LexiconFeaturesModel(sentiment_lexicon_dict, positive_around_num=[3, 5, 10], negative_around_num=[3, 5, 10], normalize_data=True)) evaluation.evaluate( LexiconFeaturesModel(sentiment_lexicon_dict, positive_around_num=[3, 5, 10], negative_around_num=[3], normalize_data=True)) evaluation.evaluate( LexiconFeaturesModel(sentiment_lexicon_dict, positive_around_num=[5], negative_around_num=[3, 5, 10], normalize_data=True)) evaluation.evaluate( LexiconFeaturesModel(sentiment_lexicon_dict, positive_around_num=[3, 5, 10, 15], negative_around_num=[3, 5, 10], normalize_data=False)) evaluation.evaluate( LexiconFeaturesModel(sentiment_lexicon_dict, positive_around_num=[3, 15], negative_around_num=[10, 15], normalize_data=False)) # BertModel evaluation.evaluate( BertModel(n_words_left_right=1, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=2, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=3, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=4, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=5, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=7, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=50, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=150, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=64, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=128, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.1, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.3, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=32, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=64, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=5)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=10)) evaluation.evaluate( BertModel(n_words_left_right=6, conv_filters=100, dense_units=256, dropout_rate=0.2, batch_size=128, epochs=15)) evaluation.evaluate( DependencyModel(sentiment_lexicon_dict, positive_around_num=[1, 2, 3, 4, 5], negative_around_num=[1, 2, 3, 4, 5], normalize_data=True))
def build_model(self): """ Tried Models : Try 1 : (bi-LSTM (Char) + Word Embeddings) + bi-LSMT + CRF Try 2 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + bi-LSTM + CRF Try 3 : (CharCNN (Char) + Word Embeddings) + Highwat Network x 2 + bi-LSTM + multi-head-attention + CRF Try 4 : (CharCNN (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF Try 5 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF : F1 66.8577 Try 6 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + Residual + CRF : F1 68.4807 Try 7 : (bi-LSTM (Char) + Word Embeddings) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : F1 71.5 Try 8 : BERT + bi-LSTM + Residual + CRF : """ self._build_placeholder() bert_config = BertConfig(vocab_size=self.parameter["embedding"][0][1], hidden_size=self.parameter["word_embedding_size"], max_position_embeddings=self.parameter["sentence_length"], type_vocab_size=self.parameter["n_class"]) bert_model = BertModel(config=bert_config, is_training=self.parameter["mode"] == "train", input_ids=self.morph, input_mask=None, token_type_ids=None, use_one_hot_embeddings=True, scope="BertModel") bert_output = bert_model.get_sequence_output() # max_seq_length = bert_output.get_shape().as_list()[1] print("[*] BERT output : ", bert_output.get_shape().as_list()) # used = tf.sign(tf.abs(self.morph)) # lengths = tf.reduce_sum(used, reduction_indices=1) """ # { "morph": 0, "morph_tag": 1, "tag" : 2, "character": 3, .. } for item in self.parameter["embedding"]: self._embedding_matrix.append(self._build_embedding(item[1], item[2], name="embedding_" + item[0])) self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph)) self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[1], self.character)) self._embeddings[0] = tf.nn.dropout(self._embeddings[0], self.dropout_rate) all_data_emb = self.ne_dict for i in range(0, len(self._embeddings) - 1): all_data_emb = tf.concat([all_data_emb, self._embeddings[i]], axis=2) all_data_emb = tf.concat([all_data_emb, char_embs_rnn], axis=2) print("[*] Embeddings : ", all_data_emb.get_shape().as_list()) """ item = self.parameter["embedding"][1] self._embedding_matrix = self._build_embedding(item[1], item[2], name="embedding_char") char_embedding = tf.nn.embedding_lookup(self._embedding_matrix, self.character) character_embedding = tf.reshape(char_embedding, [-1, self.parameter["word_length"], item[2]]) char_len = tf.reshape(self.character_len, [-1]) char_embs_rnn = self._build_birnn_model(character_embedding, seq_len=char_len, lstm_units=self.parameter["char_lstm_units"], keep_prob=self.dropout_rate, last=True, scope="char_layer") char_embs_rnn_size = char_embs_rnn.get_shape().as_list() print("[*] Character Embedding RNN size : ", char_embs_rnn_size) all_data_emb = self.ne_dict all_data_emb = tf.concat([all_data_emb, bert_output, char_embs_rnn], axis=2) print("[*] Embeddings : ", all_data_emb.get_shape().as_list()) dense_bi_lstm = DenselyConnectedBiRNN(num_layers=self.parameter["num_dc_layer"], num_units=self.parameter["lstm_units"], num_last_units=self.parameter["lstm_units"], keep_prob=self.dropout_rate)(all_data_emb, seq_len=self.sequence) print("[*] DC-bi-LSTM : ", dense_bi_lstm.get_shape().as_list()) outputs = tf.reshape(dense_bi_lstm, (-1, 2 * self.parameter["lstm_units"])) print("[*] DC-bi-LSTM-reshape : ", dense_bi_lstm.get_shape().as_list()) residual_output = tf.layers.dense(tf.reshape(all_data_emb, (-1, all_data_emb.get_shape().as_list()[-1])), units=2 * self.parameter["lstm_units"], kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg) outputs += residual_output outputs = tf.nn.dropout(outputs, self.dropout_rate) """ outputs = self._build_birnn_model(bert_output, seq_len=max_seq_length, lstm_units=self.parameter["lstm_units"], keep_prob=self.dropout_rate, scope="bi-LSTM_layer") print("[*] outputs size : ", outputs.get_shape().as_list()) """ sentence_output = tf.layers.dense(outputs, units=self.parameter["n_class"], kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg) sentence_output = tf.nn.tanh(sentence_output) print("[*] sentence_output size : ", sentence_output.get_shape().as_list()) crf_cost, crf_weight, crf_bias = self._build_crf_layer(sentence_output) costs = crf_cost self.train_op = self._build_output_layer(costs) self.cost = costs print("[+] Model loaded!")
def __init__(self): self.model_class = BertModel() self.estimator, self.tokenizer = self.model_class.get_estmator()