def __init__(self, config, cnn_filters=50, dnn_units=512, model_output_classes=2, dropout_rate=0.1, training=False, name="text_model", *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name="bert", trainable=False) self.cnn_layer1 = layers.Conv1D(filters=cnn_filters, kernel_size=2, padding="valid", activation="relu") self.cnn_layer2 = layers.Conv1D(filters=cnn_filters, kernel_size=3, padding="valid", activation="relu") self.cnn_layer3 = layers.Conv1D(filters=cnn_filters, kernel_size=4, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dnn_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) if model_output_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=model_output_classes, activation="softmax")
def __init__(self, config, conv_filters=100, dense_units=256, dropout_rate=0.2, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name='bert', trainable=False) self.conv_1 = layers.Conv1D(filters=conv_filters, kernel_size=2, padding='valid', activation='relu') self.conv_2 = layers.Conv1D(filters=conv_filters, kernel_size=3, padding='valid', activation='relu') self.conv_3 = layers.Conv1D(filters=conv_filters, kernel_size=4, padding='valid', activation='relu') self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dense_units, activation='relu') self.dropout = layers.Dropout(rate=dropout_rate) self.dense_2 = layers.Dense(units=5, activation='softmax')
def __init__(self, config, *inputs, **kwargs): TFBertPreTrainedModel.__init__( self, config, *inputs, **kwargs) # explicit calls without super TFNQModel.__init__(self, config) self.bert = TFBertMainLayer(config, name='bert')
def __init__(self, config, *inputs, **kwargs): super(TFBertForMultiClassification, self).__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.bert = TFBertMainLayer(config, name='bert') self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.classifier = tf.keras.layers.Dense(config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='classifier', activation='softmax')
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.bert = TFBertMainLayer(config, name='bert') self.initializer = get_initializer(config.initializer_range) self.qa_outputs = tf.keras.layers.Dense(config.num_labels, kernel_initializer=self.initializer, name='qa_outputs') self.long_outputs = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name='long_outputs')
def __init__(self, config, *inputs, **kwargs): super(CustomBertForSequenceClassification, self).__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.noise_amount = config.noise_amount self.noise_distribution = config.noise_distribution self.add_dense = config.add_dense self.add_dense_2 = config.add_dense_2 self.bert = TFBertMainLayer(config, name='bert') self.dropout = tf.keras.layers.Dropout(config.dense_dropout_prob) self.noise_layer = Noise(amount=self.noise_amount, activation="relu", distribution=self.noise_distribution, relative=True) if self.add_dense: self.dense_layer_1 = ScaledLinear( config.dense_units, activation="relu", scf_min=config.scf_min, scf_max=config.scf_max, scale=config.scale_dense, dropconnect_prob=config.dropconnect_prob, kernel_initializer=get_initializer(config.initializer_range), name='dense') if self.add_dense_2: self.dense_pipe = DensePipe( units=20, num_layers=10, extract_every_n_layers= 3, # Saves every n layer and return them concatenated activation="relu", use_bias=True, scale=True, scf_min=0.2, scf_max=2.0, dropconnect_prob=0.00, dropout_prob=0.1, kernel_initializer='glorot_uniform', bias_initializer='zeros') self.classifier = ScaledLinear( config.num_labels, scf_min=config.scf_min, scf_max=config.scf_max, scale=config.scale_logits, dropconnect_prob=0.0, # config.dropconnect_prob, kernel_initializer=get_initializer(config.initializer_range), name='classifier')
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name="bert") self.concat = L.Concatenate() self.dropout = L.Dropout(self.DROPOUT_RATE) self.qa_outputs = L.Dense(units=config.num_labels, activation='relu', kernel_initializer=TruncatedNormal( stddev=config.initializer_range), dtype='float32', name="qa_outputs")
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.config = config self.bert = TFBertMainLayer(self.config, name="bert") self.bilstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1)) self.dropout = Dropout(0.2) self.time_distributed = TimeDistributed( Dense(self.num_labels, activation="relu")) self.crf = CRF(self.num_labels + 1)
def __init__( self, model_path=None, model_name=None, to_reload_model=False, tag_scheme='iobes', clean_tag=True, id_to_word=None, id_to_char=None, id_to_tag=None, classifier_type='lstm', bert_path=None, bert_first_trainable_layer=0, word_dim=100, word_lstm_dim=100, word_bidirect=True, word_unroll=False, word_rnn_wrapper=False, char_dim=25, char_lstm_dim=25, char_bidirect=True, char_combine_method='concat', char_unroll=False, char_rnn_wrapper=False, # cap_dim, ele_num=True, only_CHO=True, tar_tag=False, pre_tag=False, # topic_dim, # keyword_dim, rnn_type='gru', lower=False, zeros=False, use_ori_text_char=False, crf=True, crf_begin_end=True, dropout=0.5, pre_embedding=None, lr_method='sgd-lr_.005', loss_per_token=False, batch_size=1, num_epochs=100, steps_per_epoch=3500, **kwargs): """ Initialize the model. We can init a empty model with model_name, or reload a pre-trained model from model_path :param model_path: File path to reload the model. If specified, model will be reloaded from model_path, and model_name will be discarded. :param model_name: Name of the model. If specified, the model will save in a folder called model_name. """ super().__init__(**kwargs) # Model location parent_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), '..')) if not model_path: if model_name: self.model_path = os.path.join(parent_folder, 'generated', model_name) else: self.model_path = os.path.join(parent_folder, 'generated', 'model_1') if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.parameters_path = os.path.join(self.model_path, 'parameters.pkl') else: # reload model parameters self.model_path = model_path self.parameters_path = os.path.join(self.model_path, 'parameters.pkl') self.last_cp_dir = os.path.join(self.model_path, 'last_cp') if not os.path.exists(self.last_cp_dir): os.makedirs(self.last_cp_dir) self.last_cp_path = os.path.join(self.last_cp_dir, 'cp.ckpt') self.opt_cp_dir = os.path.join(self.model_path, 'opt_cp') if not os.path.exists(self.opt_cp_dir): os.makedirs(self.opt_cp_dir) self.opt_cp_path = os.path.join(self.opt_cp_dir, 'cp.ckpt') self.tag_scheme = tag_scheme self.clean_tag = clean_tag self.classifier_type = classifier_type self.bert_path = bert_path self.bert_first_trainable_layer = bert_first_trainable_layer self.word_dim = word_dim self.word_lstm_dim = word_lstm_dim self.word_bidirect = word_bidirect self.word_unroll = word_unroll self.word_rnn_wrapper = word_rnn_wrapper self.char_dim = char_dim self.char_lstm_dim = char_lstm_dim self.char_bidirect = char_bidirect self.char_combine_method = char_combine_method self.char_unroll = char_unroll self.char_rnn_wrapper = char_rnn_wrapper self.ele_num = ele_num self.only_CHO = only_CHO self.tar_tag = tar_tag self.pre_tag = pre_tag self.rnn_type = rnn_type self.lower = lower self.zeros = zeros self.use_ori_text_char = use_ori_text_char self.crf = crf self.crf_begin_end = crf_begin_end self.dropout = dropout self.pre_embedding = pre_embedding self.lr_method = lr_method self.loss_per_token = loss_per_token self.batch_size = batch_size self.num_epochs = num_epochs self.steps_per_epoch = steps_per_epoch assert id_to_tag if not self.bert_path: assert id_to_word and id_to_char and id_to_tag self.create_mappings( id_to_word=id_to_word, id_to_char=id_to_char, id_to_tag=id_to_tag, ) if not to_reload_model: # TODO: need to think about how to save bert model self.save_model() # we will use mask defaultly # therefore, id of words and tags should starts from 1 # add embedding for mask if self.pre_embedding is not None: self.word_dim = np.shape(self.pre_embedding)[1] self.pre_embedding_matrix = np.concatenate((np.zeros( (1, self.word_dim)), self.pre_embedding)) lr_method_name, lr_method_parameters = parse_lr_method(self.lr_method) print('lr_method_name, lr_method_parameters', lr_method_name, lr_method_parameters) if lr_method_name == 'sgd': self.optimizer = keras.optimizers.SGD( learning_rate=lr_method_parameters.get('lr', 0.005), ) elif lr_method_name == 'adam': self.optimizer = keras.optimizers.Adam( learning_rate=lr_method_parameters.get('lr', 5e-5), ) elif lr_method_name == 'adamdecay': num_train_steps = self.steps_per_epoch * self.num_epochs num_warmup_steps = int(num_train_steps * lr_method_parameters.get('warmup', 0.05)) print('num_train_steps, num_warmup_steps', num_train_steps, num_warmup_steps) self.optimizer = bert_optimization.create_optimizer( init_lr=lr_method_parameters.get('lr', 5e-5), num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, epsilon=lr_method_parameters.get('epsilon', 1e-6), weight_decay_rate=lr_method_parameters.get( 'weight_decay_rate', 0.01), ) else: raise ValueError( "Not implemented learning method: {}".format(lr_method_name)) # TODO: to be removed after Huggingface is updated and TF>=2.4 is used. # This is added due to some incompatibility between keras and huggingface. # See: https://github.com/huggingface/transformers/issues/6560 self.optimizer._HAS_AGGREGATE_GRAD = False self.loss = PlainLoss(name='loss_layer') self.create_constants() # add layers if self.bert_path: if to_reload_model: bert_config = BertConfig.from_pretrained(self.bert_path) self.bert_layer = TFBertMainLayer(bert_config, name="bert") else: self.bert_layer = transformers.TFBertModel.from_pretrained( self.bert_path, from_pt=True, ).bert self.set_bert_trainable_layers(self.bert_layer, self.bert_first_trainable_layer) assert self.word_dim == 0 assert self.char_dim == 0 assert pre_embedding == None if self.pre_embedding is not None: # init embedding layer # https://stackoverflow.com/questions/47057361/keras-using-tensorflow-backend-masking-on-loss-function # mapping ids always start from 1 # To use mapping directly, mask_zero should always be ture self.word_layer = keras.layers.Embedding( input_dim=self.n_words + 1, output_dim=self.word_dim, embeddings_initializer=keras.initializers.Constant( self.pre_embedding_matrix), mask_zero=True, trainable=True, name='word_layer', ) elif self.word_dim: self.word_layer = keras.layers.Embedding( input_dim=self.n_words + 1, output_dim=self.word_dim, embeddings_initializer='glorot_uniform', mask_zero=True, trainable=True, name='word_layer', ) if self.dropout: self.dropout_layer = keras.layers.Dropout(self.dropout) # TODO: it is better to assign a name to each layer in the future if self.char_dim: self.char_layer = CharacterFeatures( n_chars=self.n_chars, char_dim=self.char_dim, char_lstm_dim=self.char_lstm_dim, char_bidirect=self.char_bidirect, char_combine_method=self.char_combine_method, batch_size=self.batch_size, rnn_type=self.rnn_type, unroll=self.char_unroll, rnn_wrapper=self.char_rnn_wrapper, ) if self.classifier_type == 'simple': print('classifier_type', classifier_type) # activation function here is always None self.unary_score_layer = SimpleSequenceScores(n_tags=self.n_tags, ) elif self.classifier_type == 'lstm': print('classifier_type', classifier_type) self.unary_score_layer = RNNSequenceScores( word_bidirect=self.word_bidirect, word_lstm_dim=self.word_lstm_dim, n_tags=self.n_tags, rnn_type=self.rnn_type, unroll=self.word_unroll, rnn_wrapper=self.word_rnn_wrapper, ) else: raise NotImplementedError('Classifier {} not recognized!'.format( self.classifier_type)) self.compile( optimizer=self.optimizer, loss=self.loss, )
def __init__(self, config, *inputs, **kwargs): super(BertForChatBotEncoder, self).__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name='bert')