예제 #1
0
class CQABertModel(nn.Module):
    """
    The BERT encoder adapted for our implementation to include 'history_answer_marker' input
    """
    def __init__(self, args, config):
        super(CQABertModel, self).__init__()
        self.args = args
        self.model = BertModel(config, self.args)

        if self.args.n_gpu > 1 and not self.args.no_cuda:
            self.model = nn.DataParallel(self.model)

        self.model.to(args.device)

    def forward(self, input_ids, input_mask, segment_ids,
                history_answer_marker, use_one_hot_embeddings):
        inputs = {
            "input_ids": input_ids.to(self.args.device),
            "attention_mask": input_mask.to(self.args.device),
            "token_type_ids": segment_ids.to(self.args.device),
            "history_answer_marker":
            history_answer_marker.to(self.args.device),
        }

        outputs = self.model(**inputs)
        sequence_output = outputs[
            0]  # final hidden layer, with dimensions [batch_size, max_seq_len, hidden_size]
        pooled_output = outputs[
            1]  # entire sequence representation/embedding of 'CLS' token
        return sequence_output, pooled_output
예제 #2
0
    def __init__(self, args, config):
        super(CQABertModel, self).__init__()
        self.args = args
        self.model = BertModel(config, self.args)

        if self.args.n_gpu > 1 and not self.args.no_cuda:
            self.model = nn.DataParallel(self.model)

        self.model.to(args.device)
예제 #3
0
class Prediction:
    def __init__(self):
        self.model_class = BertModel()
        self.estimator, self.tokenizer = self.model_class.get_estmator()

    def getPrediction(self, in_sentences):
        labels = ["Negative", "Positive"]
        label_list = [0, 1]
        MAX_SEQ_LENGTH = 130
        input_examples = [
            run_classifier.InputExample(guid="",
                                        text_a=x,
                                        text_b=None,
                                        label=0) for x in in_sentences
        ]
        input_features = run_classifier.convert_examples_to_features(
            input_examples, label_list, MAX_SEQ_LENGTH, self.tokenizer)
        predict_input_fn = run_classifier.input_fn_builder(
            features=input_features,
            seq_length=MAX_SEQ_LENGTH,
            is_training=False,
            drop_remainder=False)
        predictions = self.estimator.predict(predict_input_fn)
        return [(sentence, labels[prediction['labels']])
                for sentence, prediction in zip(in_sentences, predictions)]

    def get_prediction(self, sentence_list):
        #        pred_sentences = [
        #          "Anyway, thanks for the kind reply. Btw, I am still a SingTel subscriber",
        #          "Why is internet so thrash today @Singtel"
        #        ]

        predictions = self.getPrediction(sentence_list)
        return predictions
    def get_network(self):
        # If network already loaded simply return
        if self._network is not None: return self._network

        # If checkpoint file is available, load from checkpoint
        state_dict = self.get_trainer().try_load_statedict_from_checkpoint()

        self._network = BertModel(self._bert_model_name,
                                  self.get_label_mapper().num_classes,
                                  fine_tune=self.fine_tune)

        if state_dict is not None:
            # Only load from BERT pretrained when no checkpoint is available
            self._logger.info("checkpoint models found")
            self._network.load_state_dict(state_dict)

        return self._network
예제 #5
0
    def __init__(self, config: BertConfig):
        super(Seq2SeqModel, self).__init__()
        # 获取配置信息
        self.hidden_dim = config.hidden_size
        self.vocab_size = config.vocab_size

        # encoder and decoder
        self.bert = BertModel(config)
        self.decoder = BertLMPredictionHead(
            config, self.bert.embeddings.word_embeddings.weight)

        # 加载字典和分词器
        self.word2ix = load_bert_vocab()
        self.tokenizer = Tokenizer(self.word2ix)
    def test_forward(self):
        # Bert Config
        vocab_size = 10
        sequence_len = 20
        batch = 32
        num_classes = 3

        expected_shape = (batch, num_classes)

        input_batch = torch.randint(low=0,
                                    high=vocab_size - 1,
                                    size=(batch, sequence_len))
        config = transformers.BertConfig(vocab_size=vocab_size,
                                         hidden_size=10,
                                         num_hidden_layers=1,
                                         num_attention_heads=1,
                                         num_labels=num_classes)
        sut = BertModel(None, None, bert_config=config)

        # Act
        actual = sut.forward(input_batch)[0]

        # Assert
        self.assertEqual(expected_shape, actual.shape)
예제 #7
0
    def get_network(self):
        # If network already loaded simply return
        if self._network is not None: return self._network

        # If checkpoint file is available, load from checkpoint
        self._network = self.get_trainer().try_load_model_from_checkpoint()

        # Only load from BERT pretrained when no checkpoint is available
        if self._network is None:
            self._logger.info(
                "No checkpoint models found.. Loading pretrained BERT {}".
                format(self._bert_model_name))
            self._network = BertModel(self._bert_model_name,
                                      self.get_label_mapper().num_classes,
                                      fine_tune=self.fine_tune)

        return self._network
예제 #8
0
파일: main.py 프로젝트: yphacker/flyai_nlp
import argparse
import numpy as np
import tensorflow as tf
from flyai.dataset import Dataset
import config
from model import Model
from bert_model import BertModel

parser = argparse.ArgumentParser()
parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size")
parser.add_argument("-e", "--EPOCHS", default=8, type=int, help="train epochs")
args = parser.parse_args()

dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)
modelpp = Model(dataset)
model = BertModel()


def learning_rate_decay(learning_rate):
    return learning_rate * 0.5


def evaluate(sess):
    """评估在某一数据上的准确率和损失"""
    x_val_all, y_val_all = dataset.get_all_validation_data()
    data_len = len(y_val_all)
    index = np.random.permutation(len(y_val_all))
    n_batches = len(y_val_all) // args.BATCH + 1
    total_loss = 0.0
    total_acc = 0.0
    x_input_ids_val = x_val_all[0]
class Builder:

    def __init__(self, train_data, val_data, labels_file, model_dir, num_workers=None, checkpoint_dir=None, epochs=10,
                 early_stopping_patience=10, checkpoint_frequency=1, grad_accumulation_steps=8, batch_size=8,
                 max_seq_len=512, learning_rate=0.00001, fine_tune=True):
        self.model_dir = model_dir
        self.fine_tune = fine_tune
        self.learning_rate = learning_rate
        self.checkpoint_frequency = checkpoint_frequency
        self.grad_accumulation_steps = grad_accumulation_steps
        self.early_stopping_patience = early_stopping_patience
        self.epochs = epochs
        self.checkpoint_dir = checkpoint_dir
        self.train_data = train_data
        self.val_data = val_data
        self.labels_file = labels_file
        self.batch_size = batch_size
        # Note: Since the max seq len for pos embedding is 512 , in the pretrained  bert this must be less than eq to 512
        # Also note increasing the length greater also will create GPU out of mememory error
        self._max_seq_len = max_seq_len
        self.num_workers = num_workers or os.cpu_count() - 1
        if self.num_workers <= 0:
            self.num_workers = 0

        self._network = None
        self._train_dataloader = None
        self._train_dataset = None
        self._val_dataset = None
        self._val_dataloader = None
        self._trainer = None
        self._lossfunc = None
        self._optimiser = None
        self._label_mapper = None

        self._bert_model_name = "bert-base-cased"
        self._token_lower_case = False

    def get_preprocessor(self):
        self._logger.info("Retrieving Tokeniser")
        tokeniser = BertTokenizer.from_pretrained(self._bert_model_name, do_lower_case=self._token_lower_case)
        preprocessor = PreprocessorBertTokeniser(max_feature_len=self._max_seq_len, tokeniser=tokeniser)
        self._logger.info("Completed retrieving Tokeniser")

        return preprocessor

    def get_network(self):
        # If network already loaded simply return
        if self._network is not None: return self._network

        self._logger.info("Retrieving model")

        # If checkpoint file is available, load from checkpoint
        state_dict = self.get_trainer().try_load_statedict_from_checkpoint()

        self._network = BertModel(self._bert_model_name, self.get_label_mapper().num_classes,
                                  fine_tune=self.fine_tune)

        if state_dict is not None:
            # Only load from BERT pretrained when no checkpoint is available
            self._logger.info("checkpoint models found")
            self._network.load_state_dict(state_dict)

        self._logger.info("Retrieving model complete")


        return self._network

    def get_train_dataset(self):
        if self._train_dataset is None:
            self._train_dataset = DbpediaDataset(self.train_data, preprocessor=self.get_preprocessor())

        return self._train_dataset

    def get_val_dataset(self):
        if self._val_dataset is None:
            self._val_dataset = DbpediaDataset(self.val_data, preprocessor=self.get_preprocessor())

        return self._val_dataset

    def get_label_mapper(self):
        if self._label_mapper is None:
            self._label_mapper = DbpediaLabelMapper(self.labels_file)

        return self._label_mapper

    def get_pos_label_index(self):
        return self.get_label_mapper().positive_label_index

    def get_train_val_dataloader(self):
        if self._train_dataloader is None:
            self._train_dataloader = DataLoader(dataset=self.get_train_dataset(), num_workers=self.num_workers,
                                                batch_size=self.batch_size, shuffle=True)

        if self._val_dataloader is None:
            self._val_dataloader = DataLoader(dataset=self.get_val_dataset(), num_workers=self.num_workers,
                                              batch_size=self.batch_size, shuffle=False)

        return self._train_dataloader, self._val_dataloader

    def get_loss_function(self):
        if self._lossfunc is None:
            self._lossfunc = nn.CrossEntropyLoss()
        return self._lossfunc

    def get_optimiser(self):
        if self._optimiser is None:
            self._optimiser = Adam(params=self.get_network().parameters(), lr=self.learning_rate)
        return self._optimiser

    def get_trainer(self):
        if self._trainer is None:
            self._trainer = Train(model_dir=self.model_dir, epochs=self.epochs,
                                  early_stopping_patience=self.early_stopping_patience,
                                  checkpoint_frequency=self.checkpoint_frequency,
                                  checkpoint_dir=self.checkpoint_dir,
                                  accumulation_steps=self.grad_accumulation_steps)

        return self._trainer

    @property
    def _logger(self):
        return logging.getLogger(__name__)
예제 #10
0
    def build_model(self):
        """
        Tried Models :
        Try 1 : (bi-LSTM (Char) + Word Embeddings) + bi-LSMT + CRF
        Try 2 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + bi-LSTM + CRF
        Try 3 : (CharCNN (Char) + Word Embeddings) + Highwat Network x 2 + bi-LSTM + multi-head-attention + CRF
        Try 4 : (CharCNN (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF
        Try 5 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF : F1 66.8577
        Try 6 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + Residual
        + CRF : F1 68.4807
        Try 7 : (bi-LSTM (Char) + Word Embeddings) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : F1 71.5
        Try 8 : (bi-LSTM (Char) + BERT) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : 78.8
        Try 9 : (bi-LSTM (Char) + BERT + WordEmbedding) + Densely-Connected-bi-LSTM x 8 + Residual + CRF :
        """

        self._build_placeholder()

        word_bert_config = BertConfig(
            vocab_size=self.parameter["embedding"][0][1],
            hidden_size=self.parameter["word_embedding_size"],
            num_hidden_layers=self.parameter["num_hidden_layers"],
            num_attention_heads=self.parameter["num_attention_heads"],
            intermediate_size=self.parameter["intermediate_size"],
            # hidden_dropout_prob=1. - self.dropout_rate,
            # attention_probs_dropout_prob=1. - self.dropout_rate,
            max_position_embeddings=self.parameter["sentence_length"],
            type_vocab_size=self.parameter["n_class"])

        word_bert_model = BertModel(
            config=word_bert_config,
            is_training=self.parameter["mode"] == "train",
            input_ids=self.morph,
            input_mask=None,  # tf.sign(tf.abs(self.morph)),  # None,
            token_type_ids=self.label
            if self.parameter["mode"] == "train" else None,  # None,
            use_one_hot_embeddings=False,
            scope="WordBertModel")

        word_bert_output = word_bert_model.get_sequence_output()
        print("[*] Word BERT output : ",
              word_bert_output.get_shape().as_list())

        # { "morph": 0, "morph_tag": 1, "tag" : 2, "character": 3, .. }
        # for item in self.parameter["embedding"]:
        #     self._embedding_matrix.append(self._build_embedding(item[1], item[2], name="embedding_" + item[0]))

        # self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph))
        # self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[1], self.character))

        # self._embeddings[0] = tf.nn.dropout(self._embeddings[0], self.dropout_rate)

        char_embeddings = self._build_embedding(
            self.parameter["embedding"][1][1],
            self.parameter["embedding"][1][2],
            name="embedding_" + self.parameter["embedding"][1][0])
        char_embeddings = tf.nn.embedding_lookup(char_embeddings,
                                                 self.character)
        character_embedding = tf.reshape(char_embeddings, [
            -1, self.parameter["word_length"],
            self.parameter["char_embedding_size"]
        ])
        char_len = tf.reshape(self.character_len, [-1])
        char_embs_rnn = self._build_birnn_model(
            character_embedding,
            seq_len=char_len,
            lstm_units=self.parameter["char_lstm_units"],
            keep_prob=self.dropout_rate,
            last=True,
            scope="char_layer")
        char_embs_rnn_size = char_embs_rnn.get_shape().as_list()
        print("[*] Character Embedding RNN size : ", char_embs_rnn_size)
        """
        with tf.name_scope("CharCNN"):
            ks = [2, 3, 4, 5]
            fs = 16

            pooled_out = []
            for idx, k in enumerate(ks):
                x = tf.layers.conv1d(character_embedding, filters=fs, kernel_size=k, dilation_rate=2 ** idx,
                                     kernel_initializer=self.he_uni, kernel_regularizer=self.l2_reg,
                                     padding='same',
                                     name="dilatedCNN-%d" % idx)
                x = tf.nn.relu(x)
                x = tf.reduce_max(x, axis=1)
                pooled_out.append(x)

            char_embs_cnn = tf.concat(pooled_out, axis=1)
            char_embs_cnn = tf.reshape(char_embs_cnn, (-1, self.parameter["sentence_length"], fs * len(ks)))
            char_embs_cnn_size = char_embs_cnn.get_shape().as_list()
            print("[*] Character Embedding CNN size : ", char_embs_cnn_size)
        """

        all_data_emb = tf.concat(
            [self.ne_dict, word_bert_output, char_embs_rnn], axis=2)
        print("[*] Embeddings : ", all_data_emb.get_shape().as_list())

        dense_bi_lstm = DenselyConnectedBiRNN(
            num_layers=self.parameter["num_dc_layer"],
            num_units=self.parameter["lstm_units"],
            num_last_units=self.parameter["lstm_units"],
            keep_prob=self.dropout_rate)(all_data_emb, seq_len=self.sequence)
        print("[*] DC-bi-LSTM : ", dense_bi_lstm.get_shape().as_list())

        dense_bi_lstm = tf.reshape(
            dense_bi_lstm, (-1, dense_bi_lstm.get_shape().as_list()[-1]))
        print("[*] DC-bi-LSTM-reshape : ", dense_bi_lstm.get_shape().as_list())

        residual = tf.layers.dense(tf.reshape(
            all_data_emb, (-1, all_data_emb.get_shape().as_list()[-1])),
                                   units=2 * self.parameter["lstm_units"],
                                   kernel_initializer=self.he_uni,
                                   kernel_regularizer=self.l2_reg)

        dense_bi_lstm += residual  # tf.concat([context, p_context], axis=-1)
        outputs = tf.nn.dropout(dense_bi_lstm, self.dropout_rate)
        outputs = tf.reshape(outputs,
                             (-1, dense_bi_lstm.get_shape().as_list()[-1]))
        # outputs = tf.reshape(outputs, (-1, self.parameter["sentence_length"], outputs.get_shape().as_list()[-1]))
        print("[*] outputs size : ", outputs.get_shape().as_list())
        """
        outputs = self._build_birnn_model(outputs,
                                          self.sequence,
                                          self.parameter["lstm_units"],
                                          self.dropout_rate,
                                          scope="bi-LSTM")
        print("[*] outputs size : ", outputs.get_shape().as_list())
        """
        """
        # outputs = tf.nn.dropout(outputs, self.dropout_rate)
        # outputs = layer_norm_and_dropout(outputs, self.dropout_rate)


        outputs = self._build_birnn_model(all_data_emb,
                                          self.sequence,
                                          self.parameter["lstm_units"],
                                          self.dropout_rate,
                                          scope="bi-LSTM")
        print("[*] outputs size : ", outputs.get_shape().as_list())
        """
        """
        with tf.variable_scope("stacked-bi-LSTM"):
            fw_cell = self._build_multi_cell(self.parameter["lstm_units"], self.dropout_rate,
                                             self.parameter["num_lstm_depth"])

            bw_cell = self._build_multi_cell(self.parameter["lstm_units"], self.dropout_rate,
                                             self.parameter["num_lstm_depth"])

            outputs = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell,
                                                      dtype=tf.float32,
                                                      inputs=all_data_emb,
                                                      sequence_length=self.sequence,
                                                      scope="birnn")
            (output_fw, output_bw), _ = outputs
            outputs = tf.concat([output_fw, output_bw], axis=2)
            outputs = tf.reshape(outputs, shape=[-1, outputs.get_shape().as_list()[-1]])
            print("[*] outputs size : ", outputs.get_shape().as_list())
        """

        sentence_output = tf.layers.dense(outputs,
                                          units=self.parameter["n_class"],
                                          kernel_initializer=self.he_uni,
                                          kernel_regularizer=self.l2_reg)
        print("[*] sentence_output size : ",
              sentence_output.get_shape().as_list())

        crf_cost, crf_weight, crf_bias = self._build_crf_layer(sentence_output)

        costs = crf_cost

        self.train_op = self._build_output_layer(costs)
        self.cost = costs

        print("[+] Model loaded!")
예제 #11
0
                    type=int,
                    help="train epochs")
parser.add_argument("-b", "--BATCH", default=16, type=int, help="batch size")
args = parser.parse_args()
# 数据获取辅助类
dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)
# 模型操作辅助类
modelpp = Model(dataset)

# with tf.name_scope("summary"):
#     tf.summary.scalar("loss", loss)
#     tf.summary.scalar("acc", accuracy)
#     merged_summary = tf.summary.merge_all()

learning_rate = 5e-5
model = BertModel(modelpp)


def learning_rate_decay(learning_rate):
    return learning_rate * 0.5


def evaluate(sess):
    """评估在某一数据上的准确率和损失"""
    x_val_all, y_val_all = dataset.get_all_validation_data()
    data_len = len(y_val_all)
    index = np.random.permutation(len(y_val_all))
    batch_len = 32
    n_batches = len(y_val_all) // batch_len + 1
    total_loss = 0.0
    total_acc = 0.0
예제 #12
0
def main():
    """
        MAIN OHOHOHOHOHO
    """
    sentiment_lexicon_dict = get_sentiment_lexicon(SENTIMENT_LEXICON_DIR)

    evaluation = Evaluation(DATASET_DIR)

    # RandomModel
    evaluation.evaluate(RandomModel())

    # MajorityModel
    evaluation.evaluate(MajorityModel())

    # LexiconFeaturesModel
    evaluation.evaluate(LexiconFeaturesModel(sentiment_lexicon_dict))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10],
                             negative_around_num=[3, 5, 10],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10],
                             negative_around_num=[3],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[5],
                             negative_around_num=[3, 5, 10],
                             normalize_data=True))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 5, 10, 15],
                             negative_around_num=[3, 5, 10],
                             normalize_data=False))
    evaluation.evaluate(
        LexiconFeaturesModel(sentiment_lexicon_dict,
                             positive_around_num=[3, 15],
                             negative_around_num=[10, 15],
                             normalize_data=False))

    # BertModel
    evaluation.evaluate(
        BertModel(n_words_left_right=1,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=2,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=3,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=4,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=5,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=7,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=50,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=150,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=64,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=128,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.1,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.3,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=32,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=64,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))

    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=5))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=10))
    evaluation.evaluate(
        BertModel(n_words_left_right=6,
                  conv_filters=100,
                  dense_units=256,
                  dropout_rate=0.2,
                  batch_size=128,
                  epochs=15))

    evaluation.evaluate(
        DependencyModel(sentiment_lexicon_dict,
                        positive_around_num=[1, 2, 3, 4, 5],
                        negative_around_num=[1, 2, 3, 4, 5],
                        normalize_data=True))
예제 #13
0
    def build_model(self):
        """
        Tried Models :
        Try 1 : (bi-LSTM (Char) + Word Embeddings) + bi-LSMT + CRF
        Try 2 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + bi-LSTM + CRF
        Try 3 : (CharCNN (Char) + Word Embeddings) + Highwat Network x 2 + bi-LSTM + multi-head-attention + CRF
        Try 4 : (CharCNN (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF
        Try 5 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + CRF : F1 66.8577
        Try 6 : (bi-LSTM (Char) + Word Embeddings) + Highway Network x 2 + Densely-Connected-bi-LSTM + Residual
        + CRF : F1 68.4807
        Try 7 : (bi-LSTM (Char) + Word Embeddings) + Densely-Connected-bi-LSTM x 8 + Residual + CRF : F1 71.5
        Try 8 : BERT + bi-LSTM + Residual + CRF :
        """
        self._build_placeholder()

        bert_config = BertConfig(vocab_size=self.parameter["embedding"][0][1],
                                 hidden_size=self.parameter["word_embedding_size"],
                                 max_position_embeddings=self.parameter["sentence_length"],
                                 type_vocab_size=self.parameter["n_class"])
        bert_model = BertModel(config=bert_config,
                               is_training=self.parameter["mode"] == "train",
                               input_ids=self.morph,
                               input_mask=None,
                               token_type_ids=None,
                               use_one_hot_embeddings=True,
                               scope="BertModel")

        bert_output = bert_model.get_sequence_output()
        # max_seq_length = bert_output.get_shape().as_list()[1]

        print("[*] BERT output : ", bert_output.get_shape().as_list())

        # used = tf.sign(tf.abs(self.morph))
        # lengths = tf.reduce_sum(used, reduction_indices=1)

        """
        # { "morph": 0, "morph_tag": 1, "tag" : 2, "character": 3, .. }
        for item in self.parameter["embedding"]:
            self._embedding_matrix.append(self._build_embedding(item[1], item[2], name="embedding_" + item[0]))

        self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph))
        self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[1], self.character))

        self._embeddings[0] = tf.nn.dropout(self._embeddings[0], self.dropout_rate)

        all_data_emb = self.ne_dict
        for i in range(0, len(self._embeddings) - 1):
            all_data_emb = tf.concat([all_data_emb, self._embeddings[i]], axis=2)
        all_data_emb = tf.concat([all_data_emb, char_embs_rnn], axis=2)
        print("[*] Embeddings : ", all_data_emb.get_shape().as_list())
        """

        item = self.parameter["embedding"][1]
        self._embedding_matrix = self._build_embedding(item[1], item[2], name="embedding_char")
        char_embedding = tf.nn.embedding_lookup(self._embedding_matrix, self.character)

        character_embedding = tf.reshape(char_embedding, [-1, self.parameter["word_length"], item[2]])
        char_len = tf.reshape(self.character_len, [-1])
        char_embs_rnn = self._build_birnn_model(character_embedding, seq_len=char_len,
                                                lstm_units=self.parameter["char_lstm_units"],
                                                keep_prob=self.dropout_rate,
                                                last=True, scope="char_layer")

        char_embs_rnn_size = char_embs_rnn.get_shape().as_list()
        print("[*] Character Embedding RNN size : ", char_embs_rnn_size)

        all_data_emb = self.ne_dict
        all_data_emb = tf.concat([all_data_emb, bert_output, char_embs_rnn], axis=2)
        print("[*] Embeddings : ", all_data_emb.get_shape().as_list())

        dense_bi_lstm = DenselyConnectedBiRNN(num_layers=self.parameter["num_dc_layer"],
                                              num_units=self.parameter["lstm_units"],
                                              num_last_units=self.parameter["lstm_units"],
                                              keep_prob=self.dropout_rate)(all_data_emb, seq_len=self.sequence)
        print("[*] DC-bi-LSTM : ", dense_bi_lstm.get_shape().as_list())
        outputs = tf.reshape(dense_bi_lstm, (-1, 2 * self.parameter["lstm_units"]))
        print("[*] DC-bi-LSTM-reshape : ", dense_bi_lstm.get_shape().as_list())

        residual_output = tf.layers.dense(tf.reshape(all_data_emb, (-1, all_data_emb.get_shape().as_list()[-1])),
                                          units=2 * self.parameter["lstm_units"],
                                          kernel_initializer=self.he_uni,
                                          kernel_regularizer=self.l2_reg)
        outputs += residual_output
        outputs = tf.nn.dropout(outputs, self.dropout_rate)

        """
        outputs = self._build_birnn_model(bert_output,
                                          seq_len=max_seq_length,
                                          lstm_units=self.parameter["lstm_units"],
                                          keep_prob=self.dropout_rate,
                                          scope="bi-LSTM_layer")
        print("[*] outputs size : ", outputs.get_shape().as_list())
        """

        sentence_output = tf.layers.dense(outputs,
                                          units=self.parameter["n_class"],
                                          kernel_initializer=self.he_uni,
                                          kernel_regularizer=self.l2_reg)
        sentence_output = tf.nn.tanh(sentence_output)
        print("[*] sentence_output size : ", sentence_output.get_shape().as_list())

        crf_cost, crf_weight, crf_bias = self._build_crf_layer(sentence_output)

        costs = crf_cost

        self.train_op = self._build_output_layer(costs)
        self.cost = costs

        print("[+] Model loaded!")
예제 #14
0
 def __init__(self):
     self.model_class = BertModel()
     self.estimator, self.tokenizer = self.model_class.get_estmator()