def __init__(self,
                 config,
                 cnn_filters=50,
                 dnn_units=512,
                 model_output_classes=2,
                 dropout_rate=0.1,
                 training=False,
                 name="text_model",
                 *inputs,
                 **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.bert = TFBertMainLayer(config, name="bert", trainable=False)

        self.cnn_layer1 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=2,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer2 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=3,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer3 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=4,
                                        padding="valid",
                                        activation="relu")
        self.pool = layers.GlobalMaxPool1D()

        self.dense_1 = layers.Dense(units=dnn_units, activation="relu")
        self.dropout = layers.Dropout(rate=dropout_rate)
        if model_output_classes == 2:
            self.last_dense = layers.Dense(units=1, activation="sigmoid")
        else:
            self.last_dense = layers.Dense(units=model_output_classes,
                                           activation="softmax")
Exemple #2
0
 def __init__(self,
              config,
              conv_filters=100,
              dense_units=256,
              dropout_rate=0.2,
              *inputs,
              **kwargs):
     super().__init__(config, *inputs, **kwargs)
     self.bert = TFBertMainLayer(config, name='bert', trainable=False)
     self.conv_1 = layers.Conv1D(filters=conv_filters,
                                 kernel_size=2,
                                 padding='valid',
                                 activation='relu')
     self.conv_2 = layers.Conv1D(filters=conv_filters,
                                 kernel_size=3,
                                 padding='valid',
                                 activation='relu')
     self.conv_3 = layers.Conv1D(filters=conv_filters,
                                 kernel_size=4,
                                 padding='valid',
                                 activation='relu')
     self.pool = layers.GlobalMaxPool1D()
     self.dense_1 = layers.Dense(units=dense_units, activation='relu')
     self.dropout = layers.Dropout(rate=dropout_rate)
     self.dense_2 = layers.Dense(units=5, activation='softmax')
Exemple #3
0
    def __init__(self, config, *inputs, **kwargs):

        TFBertPreTrainedModel.__init__(
            self, config, *inputs, **kwargs)  # explicit calls without super
        TFNQModel.__init__(self, config)

        self.bert = TFBertMainLayer(config, name='bert')
Exemple #4
0
 def __init__(self, config, *inputs, **kwargs):
     super(TFBertForMultiClassification, self).__init__(config, *inputs, **kwargs)
     self.num_labels = config.num_labels
     self.bert = TFBertMainLayer(config, name='bert')
     self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
     self.classifier = tf.keras.layers.Dense(config.num_labels,
                                             kernel_initializer=get_initializer(config.initializer_range),
                                             name='classifier',
                                             activation='softmax')
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.bert = TFBertMainLayer(config, name='bert')
        self.initializer = get_initializer(config.initializer_range)
        self.qa_outputs = tf.keras.layers.Dense(config.num_labels,
                                  kernel_initializer=self.initializer, name='qa_outputs')
        self.long_outputs = tf.keras.layers.Dense(1, kernel_initializer=self.initializer,
                                    name='long_outputs')
    def __init__(self, config, *inputs, **kwargs):
        super(CustomBertForSequenceClassification,
              self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels
        self.noise_amount = config.noise_amount
        self.noise_distribution = config.noise_distribution
        self.add_dense = config.add_dense
        self.add_dense_2 = config.add_dense_2

        self.bert = TFBertMainLayer(config, name='bert')
        self.dropout = tf.keras.layers.Dropout(config.dense_dropout_prob)

        self.noise_layer = Noise(amount=self.noise_amount,
                                 activation="relu",
                                 distribution=self.noise_distribution,
                                 relative=True)

        if self.add_dense:

            self.dense_layer_1 = ScaledLinear(
                config.dense_units,
                activation="relu",
                scf_min=config.scf_min,
                scf_max=config.scf_max,
                scale=config.scale_dense,
                dropconnect_prob=config.dropconnect_prob,
                kernel_initializer=get_initializer(config.initializer_range),
                name='dense')

        if self.add_dense_2:

            self.dense_pipe = DensePipe(
                units=20,
                num_layers=10,
                extract_every_n_layers=
                3,  # Saves every n layer and return them concatenated
                activation="relu",
                use_bias=True,
                scale=True,
                scf_min=0.2,
                scf_max=2.0,
                dropconnect_prob=0.00,
                dropout_prob=0.1,
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros')

        self.classifier = ScaledLinear(
            config.num_labels,
            scf_min=config.scf_min,
            scf_max=config.scf_max,
            scale=config.scale_logits,
            dropconnect_prob=0.0,  # config.dropconnect_prob,
            kernel_initializer=get_initializer(config.initializer_range),
            name='classifier')
Exemple #7
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name="bert")
        self.concat = L.Concatenate()
        self.dropout = L.Dropout(self.DROPOUT_RATE)
        self.qa_outputs = L.Dense(units=config.num_labels,
                                  activation='relu',
                                  kernel_initializer=TruncatedNormal(
                                      stddev=config.initializer_range),
                                  dtype='float32',
                                  name="qa_outputs")
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        self.num_labels = config.num_labels
        self.config = config
        self.bert = TFBertMainLayer(self.config, name="bert")
        self.bilstm = Bidirectional(
            LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))
        self.dropout = Dropout(0.2)
        self.time_distributed = TimeDistributed(
            Dense(self.num_labels, activation="relu"))
        self.crf = CRF(self.num_labels + 1)
    def __init__(
            self,
            model_path=None,
            model_name=None,
            to_reload_model=False,
            tag_scheme='iobes',
            clean_tag=True,
            id_to_word=None,
            id_to_char=None,
            id_to_tag=None,
            classifier_type='lstm',
            bert_path=None,
            bert_first_trainable_layer=0,
            word_dim=100,
            word_lstm_dim=100,
            word_bidirect=True,
            word_unroll=False,
            word_rnn_wrapper=False,
            char_dim=25,
            char_lstm_dim=25,
            char_bidirect=True,
            char_combine_method='concat',
            char_unroll=False,
            char_rnn_wrapper=False,
            # cap_dim,
            ele_num=True,
            only_CHO=True,
            tar_tag=False,
            pre_tag=False,
            # topic_dim,
            # keyword_dim,
            rnn_type='gru',
            lower=False,
            zeros=False,
            use_ori_text_char=False,
            crf=True,
            crf_begin_end=True,
            dropout=0.5,
            pre_embedding=None,
            lr_method='sgd-lr_.005',
            loss_per_token=False,
            batch_size=1,
            num_epochs=100,
            steps_per_epoch=3500,
            **kwargs):
        """
        Initialize the model. We can init a empty model with model_name, or reload
        a pre-trained model from model_path

        :param model_path: File path to reload the model. If specified, model will be 
        reloaded from model_path, and model_name will be discarded.
        :param model_name: Name of the model. If specified, the model will save in a 
        folder called model_name. 
        """
        super().__init__(**kwargs)
        # Model location
        parent_folder = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..'))
        if not model_path:
            if model_name:
                self.model_path = os.path.join(parent_folder, 'generated',
                                               model_name)
            else:
                self.model_path = os.path.join(parent_folder, 'generated',
                                               'model_1')
            if not os.path.exists(self.model_path):
                os.makedirs(self.model_path)
            self.parameters_path = os.path.join(self.model_path,
                                                'parameters.pkl')
        else:
            # reload model parameters
            self.model_path = model_path
            self.parameters_path = os.path.join(self.model_path,
                                                'parameters.pkl')

        self.last_cp_dir = os.path.join(self.model_path, 'last_cp')
        if not os.path.exists(self.last_cp_dir):
            os.makedirs(self.last_cp_dir)
        self.last_cp_path = os.path.join(self.last_cp_dir, 'cp.ckpt')
        self.opt_cp_dir = os.path.join(self.model_path, 'opt_cp')
        if not os.path.exists(self.opt_cp_dir):
            os.makedirs(self.opt_cp_dir)
        self.opt_cp_path = os.path.join(self.opt_cp_dir, 'cp.ckpt')

        self.tag_scheme = tag_scheme
        self.clean_tag = clean_tag
        self.classifier_type = classifier_type
        self.bert_path = bert_path
        self.bert_first_trainable_layer = bert_first_trainable_layer
        self.word_dim = word_dim
        self.word_lstm_dim = word_lstm_dim
        self.word_bidirect = word_bidirect
        self.word_unroll = word_unroll
        self.word_rnn_wrapper = word_rnn_wrapper
        self.char_dim = char_dim
        self.char_lstm_dim = char_lstm_dim
        self.char_bidirect = char_bidirect
        self.char_combine_method = char_combine_method
        self.char_unroll = char_unroll
        self.char_rnn_wrapper = char_rnn_wrapper
        self.ele_num = ele_num
        self.only_CHO = only_CHO
        self.tar_tag = tar_tag
        self.pre_tag = pre_tag
        self.rnn_type = rnn_type
        self.lower = lower
        self.zeros = zeros
        self.use_ori_text_char = use_ori_text_char
        self.crf = crf
        self.crf_begin_end = crf_begin_end
        self.dropout = dropout
        self.pre_embedding = pre_embedding
        self.lr_method = lr_method
        self.loss_per_token = loss_per_token
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.steps_per_epoch = steps_per_epoch
        assert id_to_tag
        if not self.bert_path:
            assert id_to_word and id_to_char and id_to_tag
        self.create_mappings(
            id_to_word=id_to_word,
            id_to_char=id_to_char,
            id_to_tag=id_to_tag,
        )
        if not to_reload_model:
            # TODO: need to think about how to save bert model
            self.save_model()

        # we will use mask defaultly
        # therefore, id of words and tags should starts from 1
        # add embedding for mask
        if self.pre_embedding is not None:
            self.word_dim = np.shape(self.pre_embedding)[1]
            self.pre_embedding_matrix = np.concatenate((np.zeros(
                (1, self.word_dim)), self.pre_embedding))

        lr_method_name, lr_method_parameters = parse_lr_method(self.lr_method)
        print('lr_method_name, lr_method_parameters', lr_method_name,
              lr_method_parameters)
        if lr_method_name == 'sgd':
            self.optimizer = keras.optimizers.SGD(
                learning_rate=lr_method_parameters.get('lr', 0.005), )
        elif lr_method_name == 'adam':
            self.optimizer = keras.optimizers.Adam(
                learning_rate=lr_method_parameters.get('lr', 5e-5), )
        elif lr_method_name == 'adamdecay':
            num_train_steps = self.steps_per_epoch * self.num_epochs
            num_warmup_steps = int(num_train_steps *
                                   lr_method_parameters.get('warmup', 0.05))
            print('num_train_steps, num_warmup_steps', num_train_steps,
                  num_warmup_steps)
            self.optimizer = bert_optimization.create_optimizer(
                init_lr=lr_method_parameters.get('lr', 5e-5),
                num_train_steps=num_train_steps,
                num_warmup_steps=num_warmup_steps,
                epsilon=lr_method_parameters.get('epsilon', 1e-6),
                weight_decay_rate=lr_method_parameters.get(
                    'weight_decay_rate', 0.01),
            )
        else:
            raise ValueError(
                "Not implemented learning method: {}".format(lr_method_name))

        # TODO: to be removed after Huggingface is updated and TF>=2.4 is used.
        #  This is added due to some incompatibility between keras and huggingface.
        #  See: https://github.com/huggingface/transformers/issues/6560
        self.optimizer._HAS_AGGREGATE_GRAD = False

        self.loss = PlainLoss(name='loss_layer')

        self.create_constants()
        # add layers
        if self.bert_path:
            if to_reload_model:
                bert_config = BertConfig.from_pretrained(self.bert_path)
                self.bert_layer = TFBertMainLayer(bert_config, name="bert")
            else:
                self.bert_layer = transformers.TFBertModel.from_pretrained(
                    self.bert_path,
                    from_pt=True,
                ).bert
            self.set_bert_trainable_layers(self.bert_layer,
                                           self.bert_first_trainable_layer)

            assert self.word_dim == 0
            assert self.char_dim == 0
            assert pre_embedding == None

        if self.pre_embedding is not None:
            # init embedding layer
            # https://stackoverflow.com/questions/47057361/keras-using-tensorflow-backend-masking-on-loss-function
            # mapping ids always start from 1
            # To use mapping directly, mask_zero should always be ture
            self.word_layer = keras.layers.Embedding(
                input_dim=self.n_words + 1,
                output_dim=self.word_dim,
                embeddings_initializer=keras.initializers.Constant(
                    self.pre_embedding_matrix),
                mask_zero=True,
                trainable=True,
                name='word_layer',
            )
        elif self.word_dim:
            self.word_layer = keras.layers.Embedding(
                input_dim=self.n_words + 1,
                output_dim=self.word_dim,
                embeddings_initializer='glorot_uniform',
                mask_zero=True,
                trainable=True,
                name='word_layer',
            )

        if self.dropout:
            self.dropout_layer = keras.layers.Dropout(self.dropout)

        # TODO: it is better to assign a name to each layer in the future
        if self.char_dim:
            self.char_layer = CharacterFeatures(
                n_chars=self.n_chars,
                char_dim=self.char_dim,
                char_lstm_dim=self.char_lstm_dim,
                char_bidirect=self.char_bidirect,
                char_combine_method=self.char_combine_method,
                batch_size=self.batch_size,
                rnn_type=self.rnn_type,
                unroll=self.char_unroll,
                rnn_wrapper=self.char_rnn_wrapper,
            )

        if self.classifier_type == 'simple':
            print('classifier_type', classifier_type)
            # activation function here is always None
            self.unary_score_layer = SimpleSequenceScores(n_tags=self.n_tags, )
        elif self.classifier_type == 'lstm':
            print('classifier_type', classifier_type)
            self.unary_score_layer = RNNSequenceScores(
                word_bidirect=self.word_bidirect,
                word_lstm_dim=self.word_lstm_dim,
                n_tags=self.n_tags,
                rnn_type=self.rnn_type,
                unroll=self.word_unroll,
                rnn_wrapper=self.word_rnn_wrapper,
            )
        else:
            raise NotImplementedError('Classifier {} not recognized!'.format(
                self.classifier_type))

        self.compile(
            optimizer=self.optimizer,
            loss=self.loss,
        )
Exemple #10
0
 def __init__(self, config, *inputs, **kwargs):
     super(BertForChatBotEncoder, self).__init__(config, *inputs, **kwargs)
     self.bert = TFBertMainLayer(config, name='bert')