Ejemplo n.º 1
0
    def __init__(self,
                 config,
                 as_features=False,
                 use_layer_norm=True,
                 *inputs,
                 **kwargs):
        super(TFDistilBertForOrdinalRegression,
              self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels
        self.use_layer_norm = use_layer_norm

        self.distilbert = TFDistilBertMainLayer(config,
                                                name="distilbert",
                                                trainable=not as_features)
        self.pre_classifier = tf.keras.layers.Dense(
            config.dim,
            kernel_initializer=get_initializer(config.initializer_range),
            activation="relu",
            name="pre_classifier",
        )
        self.classifier = CORAL(config.num_labels,
                                kernel_initializer=get_initializer(
                                    config.initializer_range),
                                name="classifier")
        self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
        self.layer_norm = tf.keras.layers.LayerNormalization()
Ejemplo n.º 2
0
    def __init__(self, config, *inputs, **kwargs):
        """
        
        Subclasses of this class are different in self.backend,
        which should be a model that outputs a tensor of shape (batch_size, hidden_dim), and the
        `backend_call()` method.
        
        We will use Hugging Face Bert/DistilBert as backend in this notebook.
        """

        self.backend = None

        self.seq_output_dropout = tf.keras.layers.Dropout(
            kwargs.get('seq_output_dropout_prob', 0.05))
        self.pooled_output_dropout = tf.keras.layers.Dropout(
            kwargs.get('pooled_output_dropout_prob', 0.05))

        self.pos_classifier = tf.keras.layers.Dense(
            2,
            kernel_initializer=get_initializer(config.initializer_range),
            name='pos_classifier')

        self.answer_type_classifier = tf.keras.layers.Dense(
            NB_ANSWER_TYPES,
            kernel_initializer=get_initializer(config.initializer_range),
            name='answer_type_classifier')
Ejemplo n.º 3
0
 def __init__(self, config, hidden_size, dropout, activation, **kwargs):
     super().__init__(**kwargs)
     self.dense = layers.Dense(
         hidden_size,
         kernel_initializer=get_initializer(config.initializer_range),
         activation=activation,
         name="dense",
     )
     self.dropout = layers.Dropout(dropout)
     self.out_proj = layers.Dense(config.num_labels,
                                  kernel_initializer=get_initializer(
                                      config.initializer_range),
                                  name="out_proj")
    def create_model_cls_output(self, trainable=True):
        ## BERT encoder
        encoder = TFBertModel.from_pretrained("bert-base-uncased")
        encoder.trainable = trainable

        input_ids = layers.Input(shape=(512, ), dtype=tf.int32)
        attention_mask = layers.Input(shape=(512, ), dtype=tf.int32)

        embed = encoder(input_ids, attention_mask=attention_mask)
        #averaged = tf.reduce_mean(sequence_output, axis=1)
        dropout = layers.Dropout(0.1)(embed[1])
        out = layers.Dense(
            self.nums_category,
            kernel_initializer=modeling_tf_utils.get_initializer(0.02))(
                dropout)

        model = tf.keras.Model(
            inputs=[input_ids, attention_mask],
            outputs=[out],
        )

        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,
                                                         epsilon=1e-08),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(
                          from_logits=True),
                      metrics=["acc"])
        model.summary()
        return model
Ejemplo n.º 5
0
    def __init__(self, config, *inputs, **kwargs):
        super(TFGPT2MainLayer, self).__init__(config, *inputs, **kwargs)
        self.output_hidden_states = config.output_hidden_states
        self.output_attentions = config.output_attentions
        self.num_hidden_layers = config.n_layer
        self.vocab_size = config.vocab_size
        self.n_embd = config.n_embd

        self.wte = TFSharedEmbeddings(
            config.vocab_size,
            config.hidden_size,
            initializer_range=config.initializer_range,
            name='wte')
        self.wpe = tf.keras.layers.Embedding(
            config.n_positions,
            config.n_embd,
            embeddings_initializer=get_initializer(config.initializer_range),
            name='wpe')
        self.drop = tf.keras.layers.Dropout(config.embd_pdrop)
        self.h = [
            TFBlock(config.n_ctx, config, scale=True, name='h_._{}'.format(i))
            for i in range(config.n_layer)
        ]
        self.ln_f = tf.keras.layers.LayerNormalization(
            epsilon=config.layer_norm_epsilon, name='ln_f')
Ejemplo n.º 6
0
 def __init__(self, config):
     self.transformer = get_transformer(bert_model_type=config.model_type,
                                        output_hidden_states=config.output_hidden_state)
     if not config.bert_trainable:
         self.transformer.trainable = False
     self.kernel_initializer = get_initializer(self.transformer.config.initializer_range)
     super(TransformerClsModel, self).__init__(config)
Ejemplo n.º 7
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        self.num_layers = config.num_labels
        self.backbone = TFRobertaModel(config,
                                       *inputs,
                                       **kwargs,
                                       name="roberta_backbone")

        self.dropout = tf.keras.layers.Dropout(0.2)
        self.dropout_multisampled = tf.keras.layers.Dropout(0.5)

        self.classifiers = [
            tf.keras.layers.Dense(
                1,
                kernel_initializer=get_initializer(config.initializer_range),
                name="classifier") for _ in range(config.num_labels)
        ]

        self.concat = tf.keras.layers.Concatenate(axis=-1)

        self.hidden_states_weights = tf.Variable(
            initial_value=[-3.0] * config.num_hidden_layers + [0.0],
            dtype='float32',
            trainable=True,
            name="hidden_state_weights")
        self.softmax_act = tf.keras.layers.Softmax(axis=0)

        self.backbone.roberta.pooler._trainable = False
Ejemplo n.º 8
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.longformer = TFLongformerMainLayer(config, name="longformer")
        self.pre_classifier = tf.keras.layers.Dense(
            config.hidden_size,
            kernel_initializer=get_initializer(config.initializer_range),
            activation="relu",
            name="pre_classifier",
        )
        self.classifier = tf.keras.layers.Dense(
            config.num_labels,
            kernel_initializer=get_initializer(config.initializer_range),
            name="classifier")

        self.dropout = tf.keras.layers.Dropout(0.2)
Ejemplo n.º 9
0
 def __init__(self, config, other_config):
     super().__init__()
     self.num_labels = config.num_labels
     self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
     self.classifier = tf.keras.layers.Dense(
         config.num_labels,
         kernel_initializer=get_initializer(config.initializer_range),
         name='classifier')
Ejemplo n.º 10
0
 def __init__(self, config, other_config):
     super().__init__()
     self.unique_id = tf.keras.layers.Input(shape=(),
                                            dtype='int32',
                                            name='unique_id')
     self.num_labels = config.num_labels
     self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
     self.qa_outputs = tf.keras.layers.Dense(config.num_labels,
                                             name='qa_outputs')
     self.start_pos_classifier = tf.keras.layers.Dense(
         other_config['max_length'],
         kernel_initializer=get_initializer(config.initializer_range),
         name='start_position')
     self.end_pos_classifier = tf.keras.layers.Dense(
         other_config['max_length'],
         kernel_initializer=get_initializer(config.initializer_range),
         name='end_position')
Ejemplo n.º 11
0
 def __init__(self, num_labels):
     super(MyModel, self).__init__()
     self.bert = TFBertModel.from_pretrained('bert-base-chinese')
     self.dropout = tf.keras.layers.Dropout(.1)
     self.classifier = tf.keras.layers.Dense(
         units=num_labels,
         activation=tf.nn.sigmoid,
         kernel_initializer=get_initializer(0.02))
Ejemplo n.º 12
0
 def __init__(self, config, *inputs, **kwargs):
   super(CustomModel, self).__init__(config, *inputs, **kwargs)
   self.roberta = TFRobertaMainLayer(config, name="roberta")
   self.dropout_1 = tf.keras.layers.Dropout(0.3)
   #print(config.num_labels)
   self.classifier = tf.keras.layers.Dense(units=1,
                       name='classifier',
                       kernel_initializer=get_initializer(
                       config.initializer_range))
Ejemplo n.º 13
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.distilbert = TFDistilBertMainLayer(config, name='distilbert')
        self.metadata_inputs = tf.keras.layers.InputLayer(input_shape=(4, ),
                                                          name='metadata')
        self.fully_connected = tf.keras.layers.Dense(
            config.dim,
            kernel_initializer=get_initializer(config.initializer_range),
            activation='relu',
            name='fully_connected',
        )
        self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
        self.classifier = tf.keras.layers.Dense(
            config.num_labels,
            kernel_initializer=get_initializer(config.initializer_range),
            name='classifier')
Ejemplo n.º 14
0
 def __init__(self, config, *inputs, **kwargs):
     super(TFBertForMultiClassification, self).__init__(config, *inputs, **kwargs)
     self.num_labels = config.num_labels
     self.bert = TFBertMainLayer(config, name='bert')
     self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
     self.classifier = tf.keras.layers.Dense(config.num_labels,
                                             kernel_initializer=get_initializer(config.initializer_range),
                                             name='classifier',
                                             activation='softmax')
Ejemplo n.º 15
0
 def __init__(self, bert_config, lm_prediction_head):
     super(LMPredictionHead, self).__init__()
     self.embedding = TFBertEmbeddings(bert_config)
     self.embedding.weight = self.embedding.add_weight(
         name="weight",
         shape=[bert_config.vocab_size, bert_config.hidden_size],
         initializer=get_initializer(self.embedding.initializer_range),
     )
     self.lm_prediction_head = lm_prediction_head(
         bert_config, self.embedding)
Ejemplo n.º 16
0
 def get_pooler_output(self, last_hidden_states):
     pre_classifier = Dense(
         self.transformer.config.dim,
         kernel_initializer=get_initializer(self.transformer.config.initializer_range),
         activation="relu",
         name="pre_classifier",
     )
     pooled_output = last_hidden_states[:, 0]
     pooled_output = pre_classifier(pooled_output)
     return pooled_output
Ejemplo n.º 17
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.transformer = TFXLMMainLayer(config, name="transformer")
        self.dropout = tf.keras.layers.Dropout(config.dropout)
        self.classifier = tf.keras.layers.Dense(
            config.num_labels,
            kernel_initializer=get_initializer(config.init_std),
            name="classifier")
Ejemplo n.º 18
0
    def __init__(self, config, *inputs, **kwargs):
        super(TFAlbertForMultipleChoice,
              self).__init__(config, *inputs, **kwargs)

        self.albert = TFAlbertModel(config, name='albert')
        self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
        self.classifier = tf.keras.layers.Dense(
            1,
            kernel_initializer=get_initializer(config.initializer_range),
            name='classifier')
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
  
        self.albert = TFAlbertMainLayer(config)

        self.initializer = get_initializer(config.initializer_range)
        self.qa_outputs = tf.keras.layers.Dense(config.num_labels,
                                  kernel_initializer=self.initializer, name='qa_outputs')
        self.long_outputs = tf.keras.layers.Dense(1, kernel_initializer=self.initializer,
                                    name='long_outputs')
Ejemplo n.º 20
0
 def __init__(self, config, *inputs, **kwargs):
     super().__init__(config, *inputs, **kwargs)
     self.num_labels = config.num_labels
     print(kwargs)
     self.isSCR = True
     self.albert = TFAlbertMainLayer(config, name="albert")
     self.dropout = tf.keras.layers.Dropout(config.classifier_dropout_prob)
     self.classifier = tf.keras.layers.Dense(
         config.num_labels,
         kernel_initializer=get_initializer(config.initializer_range),
         name="classifier")
Ejemplo n.º 21
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.bert = TFBertMainLayer(config, name='bert')
        self.initializer = get_initializer(config.initializer_range)
        self.qa_outputs = L.Dense(config.num_labels,
                                  kernel_initializer=self.initializer,
                                  name='qa_outputs')
        self.long_outputs = L.Dense(1,
                                    kernel_initializer=self.initializer,
                                    name='long_outputs')
Ejemplo n.º 22
0
    def __init__(self, config, *inputs, **kwargs):
        self.num_labels = 1
        self.dropout_rate = 0.5
        super(TFBertForBinarySequenceClassification,
              self).__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name='bert')
        self.dropout = tf.keras.layers.Dropout(self.dropout_rate)
        self.classifier = tf.keras.layers.Dense(
            self.num_labels,
            kernel_initializer=get_initializer(config.initializer_range),
            name='classifier')
Ejemplo n.º 23
0
def create_sl_cls_model(model_name_or_path, input_seq_length, args):
    ## transformer encoder
    encoder = TFAutoModel.from_pretrained(model_name_or_path)

    encoder_config = encoder.config
    if not os.path.isfile(os.path.join(args.output_path, "config.json")):
        encoder_config.save_pretrained(args.output_path)

    input_ids = layers.Input(shape=(input_seq_length, ), dtype=tf.int32)
    token_type_ids = layers.Input(shape=(input_seq_length, ), dtype=tf.int32)
    attention_mask = layers.Input(shape=(input_seq_length, ), dtype=tf.int32)

    if "distilbert" in args.model_select:
        # distilbert does not allow to pass token_type_ids
        sequence_outs = encoder(input_ids, attention_mask=attention_mask)[0]
    else:
        sequence_outs = encoder(input_ids,
                                token_type_ids=token_type_ids,
                                attention_mask=attention_mask)[0]

    # according to modeling_tf_bert:TFBertPooler. In transformers, models like ROBERTA and Electra do not ooffer direct outputs of pooled_output
    # to make it genelisable, the pooler is re-written here
    # this may not have a big effect on perf. if simply replacing the following pooler with this: pooled_output=sequence_outs[:, 0]
    pooled_output = tf.keras.layers.Dense(
        encoder_config.hidden_size,
        kernel_initializer=get_initializer(encoder_config.initializer_range),
        activation="tanh",
        name="dense",
    )(sequence_outs[:, 0])

    if hasattr(encoder_config, "hidden_dropout_prob"):
        pooled_output = tf.keras.layers.Dropout(
            encoder_config.hidden_dropout_prob)(pooled_output, training=True)
    else:
        pooled_output = tf.keras.layers.Dropout(encoder_config.dropout)(
            pooled_output, training=True)

    logits = tf.keras.layers.Dense(len(args.label2id),
                                   name="classifier",
                                   use_bias=False)(pooled_output)
    probs = layers.Activation(keras.activations.softmax)(logits)

    model = keras.Model(
        inputs=[input_ids, token_type_ids, attention_mask],
        outputs=probs,
    )

    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = keras.optimizers.Adam(lr=args.lr)
    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=['accuracy', get_lr_metric(optimizer)])
    return model
Ejemplo n.º 24
0
    def __init__(self, num_units, output_embed_num_units):
        super().__init__()
        self.config = GPT2Config(vocab_size=1,
                                 n_positions=1024,
                                 n_ctx=1024,
                                 n_embd=num_units,
                                 n_layer=6,
                                 n_head=8)
        self.input_embedding = tf.keras.layers.Dense(num_units)
        self.transformer = TFGPT2Model(self.config)
        self.output_embedding = tf.keras.layers.Dense(output_embed_num_units)
        self.text_idx_embedding = tf.keras.layers.Embedding(
            MAX_NUM_TOKENS,
            self.config.n_embd,
            embeddings_initializer=modeling_tf_utils.get_initializer(
                self.config.initializer_range))

        self.obj_idx_embedding = tf.keras.layers.Embedding(
            MAX_NUM_TOKENS,
            self.config.n_embd,
            embeddings_initializer=modeling_tf_utils.get_initializer(
                self.config.initializer_range))
Ejemplo n.º 25
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.distilbert = TFDistilBertMainLayer(config, name='distilbert')
        self.metadata_inputs = tf.keras.layers.InputLayer(input_shape=(35, ),
                                                          name='metadata')
        self.fully_connected = tf.keras.layers.Dense(
            512,
            kernel_initializer=get_initializer(config.initializer_range),
            activation='relu',
            name='fully_connected',
        )
        self.pooling = ReshapePoolingReshape(pool_size=16,
                                             target_shape_in=(1, 768),
                                             input_shape_in=(768, ),
                                             target_shape_out=(48, ),
                                             input_shape_out=(1, 48))
        # self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
        self.regressor = tf.keras.layers.Dense(
            config.num_labels,
            kernel_initializer=get_initializer(config.initializer_range),
            name='regressor')
Ejemplo n.º 26
0
    def __init__(self, config, **kwargs):
        super(TFBertEmbeddingsSimple, self).__init__(**kwargs)
        self.vocab_size = config.vocab_size
        self.hidden_size = config.hidden_size
        self.initializer_range = config.initializer_range

        self.position_embeddings = tf.keras.layers.Embedding(
            config.max_position_embeddings,
            config.hidden_size,
            embeddings_initializer=get_initializer(self.initializer_range),
            name='position_embeddings')

        self.LayerNorm = tf.keras.layers.LayerNormalization(
            epsilon=config.layer_norm_eps, name='LayerNorm')
        self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 27
0
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        self.num_layers = config.num_labels
        self.backbone = TFBertModel(config,
                                    *inputs,
                                    **kwargs,
                                    name="bert_backbone")

        self.dropout = tf.keras.layers.Dropout(0.2)
        self.dropout_multisampled = tf.keras.layers.Dropout(0.5)

        self.weighted_sum = WeightedSumLayer(config.num_hidden_layers)

        self.classifier = tf.keras.layers.Dense(
            config.num_labels,
            kernel_initializer=get_initializer(config.initializer_range),
            name="classifier")

        self.backbone.bert.pooler._trainable = False
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
  
        self.albert = TFALBertMainLayer(config)
        self.initializer = get_initializer(config.initializer_range)

        # after we have the bert embeddings we calculate the start token with a fully connected
        self.layer_1 = tf.keras.layers.Dense(512,
                                             kernel_initializer=self.initializer, activation=tf.nn.relu)
        self.layer2 = tf.keras.layers.Dense(256,
                                            kernel_initializer=self.initializer, activation=tf.nn.relu)
        self.start_short = tf.keras.layers.Dense(1,
                                           kernel_initializer=self.initializer, name="start_short")

        self.end_short = tf.keras.layers.Dense(1,
                                         kernel_initializer=self.initializer, name="end_short")

        self.start_long = tf.keras.layers.Dense(1,
                                        kernel_initializer=self.initializer, name="start_long")

        self.end_long = tf.keras.layers.Dense(1,
                                         kernel_initializer=self.initializer, name="end_long")
Ejemplo n.º 29
0
 def build(self, input_shape):
     with tf.name_scope("word_embeddings"):
         self.word_embeddings = self.add_weight(
             "weight",
             shape=[self.vocab_size, self.hidden_size],
             initializer=get_initializer(self.initializer_range))