def __init__(self, config, as_features=False, use_layer_norm=True, *inputs, **kwargs): super(TFDistilBertForOrdinalRegression, self).__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.use_layer_norm = use_layer_norm self.distilbert = TFDistilBertMainLayer(config, name="distilbert", trainable=not as_features) self.pre_classifier = tf.keras.layers.Dense( config.dim, kernel_initializer=get_initializer(config.initializer_range), activation="relu", name="pre_classifier", ) self.classifier = CORAL(config.num_labels, kernel_initializer=get_initializer( config.initializer_range), name="classifier") self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout) self.layer_norm = tf.keras.layers.LayerNormalization()
def __init__(self, config, *inputs, **kwargs): """ Subclasses of this class are different in self.backend, which should be a model that outputs a tensor of shape (batch_size, hidden_dim), and the `backend_call()` method. We will use Hugging Face Bert/DistilBert as backend in this notebook. """ self.backend = None self.seq_output_dropout = tf.keras.layers.Dropout( kwargs.get('seq_output_dropout_prob', 0.05)) self.pooled_output_dropout = tf.keras.layers.Dropout( kwargs.get('pooled_output_dropout_prob', 0.05)) self.pos_classifier = tf.keras.layers.Dense( 2, kernel_initializer=get_initializer(config.initializer_range), name='pos_classifier') self.answer_type_classifier = tf.keras.layers.Dense( NB_ANSWER_TYPES, kernel_initializer=get_initializer(config.initializer_range), name='answer_type_classifier')
def __init__(self, config, hidden_size, dropout, activation, **kwargs): super().__init__(**kwargs) self.dense = layers.Dense( hidden_size, kernel_initializer=get_initializer(config.initializer_range), activation=activation, name="dense", ) self.dropout = layers.Dropout(dropout) self.out_proj = layers.Dense(config.num_labels, kernel_initializer=get_initializer( config.initializer_range), name="out_proj")
def create_model_cls_output(self, trainable=True): ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") encoder.trainable = trainable input_ids = layers.Input(shape=(512, ), dtype=tf.int32) attention_mask = layers.Input(shape=(512, ), dtype=tf.int32) embed = encoder(input_ids, attention_mask=attention_mask) #averaged = tf.reduce_mean(sequence_output, axis=1) dropout = layers.Dropout(0.1)(embed[1]) out = layers.Dense( self.nums_category, kernel_initializer=modeling_tf_utils.get_initializer(0.02))( dropout) model = tf.keras.Model( inputs=[input_ids, attention_mask], outputs=[out], ) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=["acc"]) model.summary() return model
def __init__(self, config, *inputs, **kwargs): super(TFGPT2MainLayer, self).__init__(config, *inputs, **kwargs) self.output_hidden_states = config.output_hidden_states self.output_attentions = config.output_attentions self.num_hidden_layers = config.n_layer self.vocab_size = config.vocab_size self.n_embd = config.n_embd self.wte = TFSharedEmbeddings( config.vocab_size, config.hidden_size, initializer_range=config.initializer_range, name='wte') self.wpe = tf.keras.layers.Embedding( config.n_positions, config.n_embd, embeddings_initializer=get_initializer(config.initializer_range), name='wpe') self.drop = tf.keras.layers.Dropout(config.embd_pdrop) self.h = [ TFBlock(config.n_ctx, config, scale=True, name='h_._{}'.format(i)) for i in range(config.n_layer) ] self.ln_f = tf.keras.layers.LayerNormalization( epsilon=config.layer_norm_epsilon, name='ln_f')
def __init__(self, config): self.transformer = get_transformer(bert_model_type=config.model_type, output_hidden_states=config.output_hidden_state) if not config.bert_trainable: self.transformer.trainable = False self.kernel_initializer = get_initializer(self.transformer.config.initializer_range) super(TransformerClsModel, self).__init__(config)
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_layers = config.num_labels self.backbone = TFRobertaModel(config, *inputs, **kwargs, name="roberta_backbone") self.dropout = tf.keras.layers.Dropout(0.2) self.dropout_multisampled = tf.keras.layers.Dropout(0.5) self.classifiers = [ tf.keras.layers.Dense( 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier") for _ in range(config.num_labels) ] self.concat = tf.keras.layers.Concatenate(axis=-1) self.hidden_states_weights = tf.Variable( initial_value=[-3.0] * config.num_hidden_layers + [0.0], dtype='float32', trainable=True, name="hidden_state_weights") self.softmax_act = tf.keras.layers.Softmax(axis=0) self.backbone.roberta.pooler._trainable = False
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.longformer = TFLongformerMainLayer(config, name="longformer") self.pre_classifier = tf.keras.layers.Dense( config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), activation="relu", name="pre_classifier", ) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier") self.dropout = tf.keras.layers.Dropout(0.2)
def __init__(self, config, other_config): super().__init__() self.num_labels = config.num_labels self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='classifier')
def __init__(self, config, other_config): super().__init__() self.unique_id = tf.keras.layers.Input(shape=(), dtype='int32', name='unique_id') self.num_labels = config.num_labels self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.qa_outputs = tf.keras.layers.Dense(config.num_labels, name='qa_outputs') self.start_pos_classifier = tf.keras.layers.Dense( other_config['max_length'], kernel_initializer=get_initializer(config.initializer_range), name='start_position') self.end_pos_classifier = tf.keras.layers.Dense( other_config['max_length'], kernel_initializer=get_initializer(config.initializer_range), name='end_position')
def __init__(self, num_labels): super(MyModel, self).__init__() self.bert = TFBertModel.from_pretrained('bert-base-chinese') self.dropout = tf.keras.layers.Dropout(.1) self.classifier = tf.keras.layers.Dense( units=num_labels, activation=tf.nn.sigmoid, kernel_initializer=get_initializer(0.02))
def __init__(self, config, *inputs, **kwargs): super(CustomModel, self).__init__(config, *inputs, **kwargs) self.roberta = TFRobertaMainLayer(config, name="roberta") self.dropout_1 = tf.keras.layers.Dropout(0.3) #print(config.num_labels) self.classifier = tf.keras.layers.Dense(units=1, name='classifier', kernel_initializer=get_initializer( config.initializer_range))
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.distilbert = TFDistilBertMainLayer(config, name='distilbert') self.metadata_inputs = tf.keras.layers.InputLayer(input_shape=(4, ), name='metadata') self.fully_connected = tf.keras.layers.Dense( config.dim, kernel_initializer=get_initializer(config.initializer_range), activation='relu', name='fully_connected', ) self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='classifier')
def __init__(self, config, *inputs, **kwargs): super(TFBertForMultiClassification, self).__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.bert = TFBertMainLayer(config, name='bert') self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.classifier = tf.keras.layers.Dense(config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='classifier', activation='softmax')
def __init__(self, bert_config, lm_prediction_head): super(LMPredictionHead, self).__init__() self.embedding = TFBertEmbeddings(bert_config) self.embedding.weight = self.embedding.add_weight( name="weight", shape=[bert_config.vocab_size, bert_config.hidden_size], initializer=get_initializer(self.embedding.initializer_range), ) self.lm_prediction_head = lm_prediction_head( bert_config, self.embedding)
def get_pooler_output(self, last_hidden_states): pre_classifier = Dense( self.transformer.config.dim, kernel_initializer=get_initializer(self.transformer.config.initializer_range), activation="relu", name="pre_classifier", ) pooled_output = last_hidden_states[:, 0] pooled_output = pre_classifier(pooled_output) return pooled_output
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.transformer = TFXLMMainLayer(config, name="transformer") self.dropout = tf.keras.layers.Dropout(config.dropout) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.init_std), name="classifier")
def __init__(self, config, *inputs, **kwargs): super(TFAlbertForMultipleChoice, self).__init__(config, *inputs, **kwargs) self.albert = TFAlbertModel(config, name='albert') self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.classifier = tf.keras.layers.Dense( 1, kernel_initializer=get_initializer(config.initializer_range), name='classifier')
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.albert = TFAlbertMainLayer(config) self.initializer = get_initializer(config.initializer_range) self.qa_outputs = tf.keras.layers.Dense(config.num_labels, kernel_initializer=self.initializer, name='qa_outputs') self.long_outputs = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name='long_outputs')
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels print(kwargs) self.isSCR = True self.albert = TFAlbertMainLayer(config, name="albert") self.dropout = tf.keras.layers.Dropout(config.classifier_dropout_prob) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier")
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.bert = TFBertMainLayer(config, name='bert') self.initializer = get_initializer(config.initializer_range) self.qa_outputs = L.Dense(config.num_labels, kernel_initializer=self.initializer, name='qa_outputs') self.long_outputs = L.Dense(1, kernel_initializer=self.initializer, name='long_outputs')
def __init__(self, config, *inputs, **kwargs): self.num_labels = 1 self.dropout_rate = 0.5 super(TFBertForBinarySequenceClassification, self).__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name='bert') self.dropout = tf.keras.layers.Dropout(self.dropout_rate) self.classifier = tf.keras.layers.Dense( self.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='classifier')
def create_sl_cls_model(model_name_or_path, input_seq_length, args): ## transformer encoder encoder = TFAutoModel.from_pretrained(model_name_or_path) encoder_config = encoder.config if not os.path.isfile(os.path.join(args.output_path, "config.json")): encoder_config.save_pretrained(args.output_path) input_ids = layers.Input(shape=(input_seq_length, ), dtype=tf.int32) token_type_ids = layers.Input(shape=(input_seq_length, ), dtype=tf.int32) attention_mask = layers.Input(shape=(input_seq_length, ), dtype=tf.int32) if "distilbert" in args.model_select: # distilbert does not allow to pass token_type_ids sequence_outs = encoder(input_ids, attention_mask=attention_mask)[0] else: sequence_outs = encoder(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0] # according to modeling_tf_bert:TFBertPooler. In transformers, models like ROBERTA and Electra do not ooffer direct outputs of pooled_output # to make it genelisable, the pooler is re-written here # this may not have a big effect on perf. if simply replacing the following pooler with this: pooled_output=sequence_outs[:, 0] pooled_output = tf.keras.layers.Dense( encoder_config.hidden_size, kernel_initializer=get_initializer(encoder_config.initializer_range), activation="tanh", name="dense", )(sequence_outs[:, 0]) if hasattr(encoder_config, "hidden_dropout_prob"): pooled_output = tf.keras.layers.Dropout( encoder_config.hidden_dropout_prob)(pooled_output, training=True) else: pooled_output = tf.keras.layers.Dropout(encoder_config.dropout)( pooled_output, training=True) logits = tf.keras.layers.Dense(len(args.label2id), name="classifier", use_bias=False)(pooled_output) probs = layers.Activation(keras.activations.softmax)(logits) model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=probs, ) loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = keras.optimizers.Adam(lr=args.lr) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy', get_lr_metric(optimizer)]) return model
def __init__(self, num_units, output_embed_num_units): super().__init__() self.config = GPT2Config(vocab_size=1, n_positions=1024, n_ctx=1024, n_embd=num_units, n_layer=6, n_head=8) self.input_embedding = tf.keras.layers.Dense(num_units) self.transformer = TFGPT2Model(self.config) self.output_embedding = tf.keras.layers.Dense(output_embed_num_units) self.text_idx_embedding = tf.keras.layers.Embedding( MAX_NUM_TOKENS, self.config.n_embd, embeddings_initializer=modeling_tf_utils.get_initializer( self.config.initializer_range)) self.obj_idx_embedding = tf.keras.layers.Embedding( MAX_NUM_TOKENS, self.config.n_embd, embeddings_initializer=modeling_tf_utils.get_initializer( self.config.initializer_range))
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_labels = config.num_labels self.distilbert = TFDistilBertMainLayer(config, name='distilbert') self.metadata_inputs = tf.keras.layers.InputLayer(input_shape=(35, ), name='metadata') self.fully_connected = tf.keras.layers.Dense( 512, kernel_initializer=get_initializer(config.initializer_range), activation='relu', name='fully_connected', ) self.pooling = ReshapePoolingReshape(pool_size=16, target_shape_in=(1, 768), input_shape_in=(768, ), target_shape_out=(48, ), input_shape_out=(1, 48)) # self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout) self.regressor = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name='regressor')
def __init__(self, config, **kwargs): super(TFBertEmbeddingsSimple, self).__init__(**kwargs) self.vocab_size = config.vocab_size self.hidden_size = config.hidden_size self.initializer_range = config.initializer_range self.position_embeddings = tf.keras.layers.Embedding( config.max_position_embeddings, config.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name='position_embeddings') self.LayerNorm = tf.keras.layers.LayerNormalization( epsilon=config.layer_norm_eps, name='LayerNorm') self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.num_layers = config.num_labels self.backbone = TFBertModel(config, *inputs, **kwargs, name="bert_backbone") self.dropout = tf.keras.layers.Dropout(0.2) self.dropout_multisampled = tf.keras.layers.Dropout(0.5) self.weighted_sum = WeightedSumLayer(config.num_hidden_layers) self.classifier = tf.keras.layers.Dense( config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier") self.backbone.bert.pooler._trainable = False
def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.albert = TFALBertMainLayer(config) self.initializer = get_initializer(config.initializer_range) # after we have the bert embeddings we calculate the start token with a fully connected self.layer_1 = tf.keras.layers.Dense(512, kernel_initializer=self.initializer, activation=tf.nn.relu) self.layer2 = tf.keras.layers.Dense(256, kernel_initializer=self.initializer, activation=tf.nn.relu) self.start_short = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name="start_short") self.end_short = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name="end_short") self.start_long = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name="start_long") self.end_long = tf.keras.layers.Dense(1, kernel_initializer=self.initializer, name="end_long")
def build(self, input_shape): with tf.name_scope("word_embeddings"): self.word_embeddings = self.add_weight( "weight", shape=[self.vocab_size, self.hidden_size], initializer=get_initializer(self.initializer_range))