コード例 #1
0
    def test_encoder_decoder_from_pretrained(self):
        load_weight_prefix = TFEncoderDecoderModel.load_weight_prefix

        config = self.get_encoder_decoder_config()
        encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        decoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

        input_ids = encoder_tokenizer("who sings does he love me with reba", return_tensors="tf").input_ids
        decoder_input_ids = decoder_tokenizer("Linda Davis", return_tensors="tf").input_ids

        with tempfile.TemporaryDirectory() as tmp_dirname:

            # Since most of HF's models don't have pretrained cross-attention layers, they are randomly
            # initialized even if we create models using `from_pretrained` method.
            # For the tests, the decoder need to be a model with pretrained cross-attention layers.
            # So we create pretrained models (without `load_weight_prefix`), save them, and later,
            # we load them using `from_pretrained`.
            # (we don't need to do this for encoder, but let's make the code more similar between encoder/decoder)
            encoder = TFAutoModel.from_pretrained("bert-base-uncased", name="encoder")
            # It's necessary to specify `add_cross_attention=True` here.
            decoder = TFAutoModelForCausalLM.from_pretrained(
                "bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder"
            )
            pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder")
            pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder")
            encoder.save_pretrained(pretrained_encoder_dir)
            decoder.save_pretrained(pretrained_decoder_dir)
            del encoder
            del decoder

            enc_dec_model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
                pretrained_encoder_dir,
                pretrained_decoder_dir,
            )
            # check that the from pretrained methods work
            enc_dec_model.save_pretrained(tmp_dirname)
            enc_dec_model = TFEncoderDecoderModel.from_pretrained(tmp_dirname)

            output = enc_dec_model(input_ids, decoder_input_ids=decoder_input_ids, labels=decoder_input_ids)

            loss_pretrained = output.loss
            del enc_dec_model

            # Create the model using `__init__` with loaded ``pretrained`` encoder / decoder
            encoder = TFAutoModel.from_pretrained(
                pretrained_encoder_dir, load_weight_prefix=load_weight_prefix, name="encoder"
            )
            decoder = TFAutoModelForCausalLM.from_pretrained(
                pretrained_decoder_dir, load_weight_prefix=load_weight_prefix, name="decoder"
            )
            enc_dec_model = TFEncoderDecoderModel(config=config, encoder=encoder, decoder=decoder)

        output = enc_dec_model(input_ids, decoder_input_ids=decoder_input_ids, labels=decoder_input_ids)

        loss_init = output.loss

        max_diff = np.max(np.abs(loss_pretrained - loss_init))
        expected_diff = 0.0

        self.assertAlmostEqual(max_diff, expected_diff, places=4)
コード例 #2
0
def build_model(model_id1='bert-base-multilingual-cased',
                model_id2='bert-base-multilingual-uncased',
                max_len=192,
                dropout=0.2,
                **_):
    """ build a dual TFAutoModel """
    print(model_id1, model_id2)

    transformer1 = TFAutoModel.from_pretrained(model_id1)
    transformer2 = TFAutoModel.from_pretrained(model_id2)

    input_word_ids1 = Input(shape=(max_len, ),
                            dtype=tf.int32,
                            name="input_word_ids1")
    out1 = transformer1(input_word_ids1)

    input_word_ids2 = Input(shape=(max_len, ),
                            dtype=tf.int32,
                            name="input_word_ids2")
    out2 = transformer2(input_word_ids2)

    sequence_output1 = out1[0]
    sequence_output2 = out2[0]
    cls_token1 = sequence_output1[:, 0, :]
    cls_token2 = sequence_output2[:, 0, :]

    x = Dropout(dropout)(cls_token1) + Dropout(dropout)(cls_token2)
    out = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[input_word_ids1, input_word_ids2], outputs=out)

    return model
コード例 #3
0
def create_model(n_dense1=64, n_dense2=16, dout_rate=0.1, **kwargs):
    embedding_base = kwargs.embedding_base  # specify ProtBERT_BFD or XLNET
    categories = kwargs.categories  # number of labels

    # acrobatics to avoid putting a model inside a model in keras which prevents saving the model
    if embedding_base == "ProtBERT_BFD":
        if kwargs.max_len:
            assert isinstance(kwargs.max_len, int)
            max_len = kwargs.max_len
        else:
            max_len = defaults.MAX_LEN

        base = TFAutoModel.from_pretrained('Rostlab/prot_bert_bfd')
        assert isinstance(base, TFBertModel)
        main_layer = base.bert

        input_ids = tf.keras.layers.Input(shape=(max_len, ),
                                          name='input_ids',
                                          dtype='int32')
        mask = tf.keras.layers.Input(shape=(max_len, ),
                                     name='attention_mask',
                                     dtype='int32')

        embeddings = main_layer(input_ids, attention_mask=mask)[0]

    elif embedding_base == "XLNET":  # TODO: probably needs debugging
        base = TFAutoModel.from_pretrained("Rostlab/prot_xlnet", from_pt=True)
        assert isinstance(base, TFXLNetForSequenceClassification)
        main_layer = base.xlnet

        inputs = tf.keras.layers.Input(shape=None,
                                       name="input layer",
                                       ragged=True)
        embeddings = main_layer(inputs)[0]

    else:
        print("create_model(): invalid arg")
        # throw error
    del base

    # TODO: fix input tensor issue from embedding layers : [0]
    X = tf.keras.layers.GlobalMaxPooling1D()(embeddings)
    X = tf.keras.layers.BatchNormalization()(X)
    X = tf.keras.layers.Dense(n_dense1, activation='relu')(X)
    X = tf.keras.layers.Dropout(dout_rate)(X)
    X = tf.keras.layers.Dense(n_dense2, activation='relu')(X)
    y = tf.keras.layers.Dense(categories, activation='softmax',
                              name='outputs')(X)
    # if you are going to adjust the inner workings of the classification head, do so here.

    model = tf.keras.Model(inputs=(input_ids, mask), outputs=[y])
    model.layers[2].trainable = False

    return model
def _embedding_from_bert():

    with tf.device("CPU:0"):  
        input_pretrained_bert = TFAutoModel.from_pretrained(config.input_pretrained_model, 
                                              trainable=False, 
                                              name=config.input_pretrained_model)
        target_pretrained_bert = TFAutoModel.from_pretrained(config.target_pretrained_model, 
                                              trainable=False, 
                                              name=config.target_pretrained_model)
    decoder_embedding = target_pretrained_bert.get_weights()[0]

    return (decoder_embedding, input_pretrained_bert, target_pretrained_bert)
コード例 #5
0
    def test_from_pretrained_with_tuple_values(self):
        # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
        model = TFAutoModel.from_pretrained("sgugger/funnel-random-tiny")
        self.assertIsInstance(model, TFFunnelModel)

        config = copy.deepcopy(model.config)
        config.architectures = ["FunnelBaseModel"]
        model = TFAutoModel.from_config(config)
        self.assertIsInstance(model, TFFunnelBaseModel)

        with tempfile.TemporaryDirectory() as tmp_dir:
            model.save_pretrained(tmp_dir)
            model = TFAutoModel.from_pretrained(tmp_dir)
            self.assertIsInstance(model, TFFunnelBaseModel)
コード例 #6
0
    def __init__(
            self,
            pretrained_model_name_or_path,
            reduce_output='sum',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(AutoTransformerEncoder, self).__init__()
        try:
            from transformers import TFAutoModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFAutoModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        if not self.reduce_output == 'cls_pooled':
            self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
コード例 #7
0
ファイル: single_model.py プロジェクト: tangletit/jigsaw20
def build_model(model_id='jplu/tf-xlm-roberta-large',
                from_pt=False,
                transformer=None,
                max_len=192,
                dropout=0.2,
                pooling='first',
                **_):
    """ build a TFAutoModel """
    if transformer is None:
        transformer = TFAutoModel.from_pretrained(model_id, from_pt=from_pt)

    input_word_ids = Input(shape=(max_len, ),
                           dtype=tf.int32,
                           name="input_word_ids")
    sequence_output = transformer(input_word_ids)[0]

    if pooling == 'first':
        cls_token = sequence_output[:, 0, :]
    elif pooling == 'max':
        cls_token = GlobalMaxPooling1D()(sequence_output)
    elif pooling == 'avg':
        cls_token = GlobalAveragePooling1D()(sequence_output)
    elif pooling == 'GeM':
        cls_token = GeneralizedMeanPooling1D(p=3)(sequence_output)

    if dropout > 0:
        cls_token = Dropout(dropout)(cls_token)

    out = Dense(1, activation='sigmoid')(cls_token)
    model = Model(inputs=input_word_ids, outputs=out)

    return model
コード例 #8
0
def build_classifier(model_name, max_len, learning_rate, metrics):
    """
    Constructing a transformer model given a configuration.
    """
    # Defining the encoded inputs
    input_ids = tf.keras.layers.Input(shape=(max_len, ),
                                      dtype=tf.int32,
                                      name="input_ids")

    # Loading pretrained transformer model
    transformer_model = TFAutoModel.from_pretrained(model_name)

    # Defining the data embedding using the loaded model
    transformer_embeddings = transformer_model(input_ids)[0]

    # Defining the classifier layer
    output_values = tf.keras.layers.Dense(3, activation="softmax")(
        transformer_embeddings[:, 0, :])

    # Constructing the final model along with an optimizer, loss function and metrics
    model = tf.keras.Model(inputs=input_ids, outputs=output_values)
    opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metrics = metrics
    model.compile(optimizer=opt, loss=loss, metrics=metrics)

    return model
コード例 #9
0
def build_model(
    hparams):

    bert_model = TFAutoModel.from_pretrained(hparams["bert_file_name"])
    bert_model.trainable = True

    if  not hparams['trainable_bert'] is None:
        bert_model.trainable = hparams['trainable_bert']


    input_layer_ids = Input(shape = (hparams['max_sequence_length'],), dtype='int64')
    input_layer_masks = Input(shape = (hparams['max_sequence_length'],), dtype='int64')
    bert_output = bert_model([input_layer_ids,input_layer_masks])
    bert_output = bert_output[1]

    classifier = Dense(units = 2,
        activation = 'sigmoid',
        kernel_initializer = initializers.RandomUniform(
            minval = - 1 / np.sqrt(bert_output.shape[1]),
            maxval = 1 / np.sqrt(bert_output.shape[1])
        ),
        bias_initializer = initializers.Zeros(),
        kernel_regularizer = regularizers.l2(hparams['l2_regularization'])
    )(bert_output)

    model = Model(inputs=[input_layer_ids,input_layer_masks], outputs=classifier)
    model.compile(
        loss= dice_loss,
        optimizer = Adam(learning_rate = hparams["learning_rate"]),
        metrics = [f1_score]
    )
    plot_model(model, "model_bert.png", show_layer_names=False)
    return model
コード例 #10
0
def build_model(model_name, max_len, learning_rate, metrics):
    """
    Building the Deep Learning architecture
    """
    # defining encoded inputs
    input_ids = Input(shape=(max_len, ), dtype=tf.int32, name="input_ids")

    # defining transformer model embeddings
    transformer_model = TFAutoModel.from_pretrained(model_name)
    transformer_embeddings = transformer_model(input_ids)[0]

    # defining output layer
    output_values = Dense(512, activation="relu")(transformer_embeddings[:,
                                                                         0, :])
    output_values = Dropout(0.5)(output_values)
    #output_values = Dense(32, activation = "relu")(output_values)
    output_values = Dense(1, activation='sigmoid')(output_values)

    # defining model
    model = Model(inputs=input_ids, outputs=output_values)
    opt = Adam(learning_rate=learning_rate)
    loss = tf.keras.losses.BinaryCrossentropy()
    metrics = metrics

    model.compile(optimizer=opt, loss=loss, metrics=metrics)

    return model
コード例 #11
0
    def __init__(self, model_name: str, output_dim: int) -> None:
        super(SentimentAnalysisModel, self).__init__()

        config = AutoConfig.from_pretrained(model_name)

        self.transformer = TFAutoModel.from_pretrained(model_name)

        # freeze all but last layer of transformer
        layers_to_freeze = None
        frozen_params = 0
        if type(self.transformer) is TFGPT2Model:
            layers_to_freeze = self.transformer.layers[0].h[:-1]
        elif type(self.transformer) is TFDistilBertModel:
            layers_to_freeze = self.transformer.layers[
                0].transformer.layer[:-1]
        elif type(self.transformer) is TFT5Model:
            layers_to_freeze = self.transformer.layers[1].block[:-1]
            layers_to_freeze.extend(self.transformer.layers[2].block[:-1])

        for layer in layers_to_freeze:
            layer.trainable = False

        print(
            f'Init model: frozen {len(self.transformer.non_trainable_variables)} variables.'
        )

        self.pre_classifier = Linear(units=config.hidden_size,
                                     input_dim=config.hidden_size,
                                     activation='linear')
        self.dropout = Dropout(0.3)
        # self.classifier = Linear(units=output_dim, input_dim=config.hidden_size, activation='linear')
        self.classifier = Linear(units=1,
                                 input_dim=config.hidden_size,
                                 activation='linear')
コード例 #12
0
    def __init__(
        self,
        seq_len: int = 100,
        text_model_name: str = 'bert-base-uncased',
        vision_model: tf.keras.applications = VGG19(weights="imagenet",
                                                    include_top=False)
    ) -> None:

        super(VisionBertModel, self).__init__()
        self.text_model_layer = TFAutoModel.from_pretrained(text_model_name)
        self.text_model_layer.trainable = False

        self.vision_model = vision_model
        self.vision_model.trainable = False

        self.flatten = Flatten()
        self.dropout = Dropout(0.2)
        self.concat = Concatenate(axis=1)

        self.global_dense1 = Dense(128, activation='relu')
        self.global_dense2 = Dense(64, activation='relu')
        self.global_dense3 = Dense(1, activation='sigmoid')
        self.dense_text1 = Dense(768, activation='relu')
        self.dense_text2 = Dense(256, activation='relu')
        self.img_dense1 = Dense(512 * 8, activation='relu')
        self.img_dense2 = Dense(512 * 4, activation='relu')
        self.img_dense3 = Dense(512 * 2, activation='relu')
        self.img_dense4 = Dense(512, activation='relu')
        self.img_dense5 = Dense(256, activation='relu')
コード例 #13
0
def build_transformer(transformer,
                      max_seq_length,
                      num_labels,
                      tagging=True,
                      tokenizer_only=False):
    tokenizer = AutoTokenizer_.from_pretrained(transformer)
    if tokenizer_only:
        return tokenizer
    l_bert = TFAutoModel.from_pretrained(transformer)
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                        dtype='int32',
                                        name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype='int32',
                                       name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                             dtype='int32',
                                             name="token_type_ids")
    output = l_bert(input_ids=l_input_ids,
                    token_type_ids=l_token_type_ids,
                    attention_mask=l_mask_ids).last_hidden_state
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    logits = tf.keras.layers.Dense(num_labels)(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids],
                           outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    return model, tokenizer
コード例 #14
0
    def create_model(self, path_weights=None):
        phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
        self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base",
                                                       use_fast=False)
        MAX_LEN = 25
        ids = tf.keras.layers.Input(shape=(25), dtype=tf.int32)
        mask = tf.keras.layers.Input(shape=(25, ),
                                     name='attention_mask',
                                     dtype='int32')
        # For transformers v4.x+:

        embeddings = phobert(ids, attention_mask=mask)[0]
        X = (tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(128)))(embeddings)
        X = tf.keras.layers.BatchNormalization()(X)
        X = tf.keras.layers.Dense(128, activation='relu')(X)
        X = tf.keras.layers.Dropout(0.1)(X)
        y = tf.keras.layers.Dense(6, activation='softmax', name='outputs')(X)
        self.model = tf.keras.models.Model(inputs=[ids, mask], outputs=[y])
        # model.summary()
        # model.layers[2].trainable = False
        # model.layers[2].roberta.embeddings.trainable = True
        # print()
        # print(model.layers[2])
        # inputs = [inputs]
        # model.compile(optimizer='Adam',loss = 'categorical_crossentropy',metrics='accuracy')
        if path_weights != None:
            self.model.load_weights(path_weights)
コード例 #15
0
def _embedding_from_bert():

    with tf.device("CPU:0"):
        input_pretrained_bert = TFAutoModel.from_pretrained(
            config.input_pretrained_model,
            trainable=False,
            name=config.input_pretrained_model)
        target_pretrained_bert = TFAutoModel.from_pretrained(
            config.target_pretrained_model,
            trainable=False,
            name=config.target_pretrained_model
        ) if config['task'] == 'translate' else input_pretrained_bert
    decoder_embedding = target_pretrained_bert.get_weights()[0]
    log.info(f"Decoder_Embedding matrix shape '{decoder_embedding.shape}'")

    return (decoder_embedding, input_pretrained_bert, target_pretrained_bert)
コード例 #16
0
def build_model(transformer_layer, max_len, learning_rate):
    # must use this to send to TPU cores
    with strategy.scope():
        # define input(s)
        input_ids = tf.keras.Input(shape=(max_len, ), dtype=tf.int32)
        print("input")

        # insert roberta layer
        roberta = TFAutoModel.from_pretrained(transformer_layer)
        roberta = roberta(input_ids)[0]
        print("roberta")
        # only need <s> token here, so we extract it now
        out = roberta[:, 0, :]

        # add our softmax layer
        out = tf.keras.layers.Dense(3, activation='softmax')(out)
        print("dense")
        # assemble model and compile
        model = tf.keras.Model(inputs=input_ids, outputs=out)
        print("model")

        model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

    return model
コード例 #17
0
    def test_output_embeds_base_model(self):
        model = TFAutoModel.from_pretrained("amazon/bort")

        input_ids = tf.convert_to_tensor(
            [[
                0, 18077, 4082, 7804, 8606, 6195, 2457, 3321, 11, 10489, 16,
                269, 2579, 328, 2
            ]],
            dtype=tf.int32,
        )  # Schloß Nymphenburg in Munich is really nice!

        output = model(input_ids)["last_hidden_state"]
        expected_shape = tf.TensorShape((1, 15, 1024))
        self.assertEqual(output.shape, expected_shape)
        # compare the actual values for a slice.
        expected_slice = tf.convert_to_tensor(
            [[[-0.0349, 0.0436, -1.8654], [-0.6964, 0.0835, -1.7393],
              [-0.9819, 0.2956, -0.2868]]],
            dtype=tf.float32,
        )

        self.assertTrue(
            np.allclose(output[:, :3, :3].numpy(),
                        expected_slice.numpy(),
                        atol=1e-4))
コード例 #18
0
    def test_build_save_load_model(self):
        """Test that full model is built properly."""
        strategy = tf.distribute.MirroredStrategy(
            cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
        os.makedirs("biomed_roberta_base")
        self.model.save_pretrained("biomed_roberta_base")
        with strategy.scope():
            model = TFAutoModel.from_pretrained("biomed_roberta_base",
                                                from_pt=True)
            model = build_model(model)
        shutil.rmtree("biomed_roberta_base")

        self.assertEqual(
            str(type(model)),
            "<class 'tensorflow.python.keras.engine.training.Model'>")

        save_model(model, timed_dir_name=False, transformer_dir=self.out_dir)

        self.assertTrue(
            os.path.isfile(os.path.join(self.out_dir, 'sigmoid.pickle')))
        self.assertTrue(
            os.path.isfile(os.path.join(self.out_dir, 'config.json')))
        self.assertTrue(
            os.path.isfile(os.path.join(self.out_dir, 'tf_model.h5')))

        pickle_path = os.path.join(self.out_dir, 'sigmoid.pickle')
        model = load_model(pickle_path=pickle_path,
                           transformer_dir=self.out_dir)

        self.assertEqual(
            str(type(model)),
            "<class 'tensorflow.python.keras.engine.training.Model'>")
コード例 #19
0
 def test_revision_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
     ):
         _ = TFAutoModel.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER,
                                         revision="aaaaaa")
コード例 #20
0
 def test_model_file_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             "hf-internal-testing/config-no-model does not appear to have a file named tf_model.h5",
     ):
         _ = TFAutoModel.from_pretrained(
             "hf-internal-testing/config-no-model")
コード例 #21
0
ファイル: __init__.py プロジェクト: yk/jina-hub
    def post_init(self):
        from transformers import TFAutoModel, AutoTokenizer

        self.tokenizer = AutoTokenizer.from_pretrained(self.base_tokenizer_model)
        self.model = TFAutoModel.from_pretrained(
            self.pretrained_model_name_or_path, output_hidden_states=True
        )
        self.to_device()
コード例 #22
0
ファイル: tf_processing.py プロジェクト: kritim13/task-vt
def load_model(pickle_path, transformer_dir='transformer', max_len=512):
    """Load a keras model containing a transformer layer."""
    transformer = TFAutoModel.from_pretrained(transformer_dir)
    model = build_model(transformer, max_len=max_len)
    sigmoid = pickle.load(open(pickle_path, 'rb'))
    model.get_layer('sigmoid').set_weights(sigmoid)

    return model
コード例 #23
0
def _compute_tensorflow(model_names, dictionary, average_over, amp):
    for c, model_name in enumerate(model_names):
        print(f"{c + 1} / {len(model_names)}")
        config = AutoConfig.from_pretrained(model_name)
        model = TFAutoModel.from_pretrained(model_name, config=config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        tokenized_sequence = tokenizer.encode(input_text,
                                              add_special_tokens=False)

        max_input_size = tokenizer.max_model_input_sizes[model_name]
        batch_sizes = [1, 2, 4, 8]
        slice_sizes = [8, 64, 128, 256, 512, 1024]

        dictionary[model_name] = {
            "bs": batch_sizes,
            "ss": slice_sizes,
            "results": {}
        }
        dictionary[model_name]["results"] = {i: {} for i in batch_sizes}

        print("Using model", model)

        @tf.function
        def inference(inputs):
            return model(inputs)

        for batch_size in batch_sizes:
            for slice_size in slice_sizes:
                if max_input_size is not None and slice_size > max_input_size:
                    dictionary[model_name]["results"][batch_size][
                        slice_size] = "N/A"
                else:
                    sequence = tf.stack([
                        tf.squeeze(
                            tf.constant(
                                tokenized_sequence[:slice_size])[None, :])
                    ] * batch_size)

                    try:
                        print("Going through model with sequence of shape",
                              sequence.shape)
                        # To make sure that the model is traced + that the tensors are on the appropriate device
                        inference(sequence)

                        runtimes = timeit.repeat(lambda: inference(sequence),
                                                 repeat=average_over,
                                                 number=3)
                        average_time = sum(runtimes) / float(
                            len(runtimes)) / 3.0
                        dictionary[model_name]["results"][batch_size][
                            slice_size] = average_time
                    except tf.errors.ResourceExhaustedError as e:
                        print("Doesn't fit on GPU.", e)
                        torch.cuda.empty_cache()
                        dictionary[model_name]["results"][batch_size][
                            slice_size] = "N/A"
    return dictionary
コード例 #24
0
    def test_cached_model_has_minimum_calls_to_head(self):
        # Make sure we have cached the model.
        _ = TFAutoModel.from_pretrained("hf-internal-testing/tiny-random-bert")
        with RequestCounter() as counter:
            _ = TFAutoModel.from_pretrained(
                "hf-internal-testing/tiny-random-bert")
            self.assertEqual(counter.get_request_count, 0)
            self.assertEqual(counter.head_request_count, 1)
            self.assertEqual(counter.other_request_count, 0)

        # With a sharded checkpoint
        _ = TFAutoModel.from_pretrained("ArthurZ/tiny-random-bert-sharded")
        with RequestCounter() as counter:
            _ = TFAutoModel.from_pretrained("ArthurZ/tiny-random-bert-sharded")
            self.assertEqual(counter.get_request_count, 0)
            # There is no pytorch_model.bin so we still get one call for this one.
            self.assertEqual(counter.head_request_count, 2)
            self.assertEqual(counter.other_request_count, 0)
コード例 #25
0
    def test_model_from_pretrained(self):
        model_name = "bert-base-cased"
        config = AutoConfig.from_pretrained(model_name)
        self.assertIsNotNone(config)
        self.assertIsInstance(config, BertConfig)

        model = TFAutoModel.from_pretrained(model_name)
        self.assertIsNotNone(model)
        self.assertIsInstance(model, TFBertModel)
コード例 #26
0
ファイル: re_model.py プロジェクト: kritim13/task-vt
 def load_model(self, pickle_path:str, transformer_dir:str='transformer', max_len=512):
     """
     Special function to load a keras model that uses a transformer layer
     """
     transformer = TFAutoModel.from_pretrained(transformer_dir)
     model = self.build_model(transformer, max_len=max_len)
     sigmoid = pickle.load(open(pickle_path, 'rb'))
     model.get_layer('sigmoid').set_weights(sigmoid)
     
     return model
コード例 #27
0
    def _load(self):
        """

        :return:
        """
        self._tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base",
                                                        use_fast=False)
        self._model = TFAutoModel.from_pretrained("vinai/bertweet-base")
        self._normalizer = TweetNormalisation()
        self.OUTPUT = "last_hidden_state"
コード例 #28
0
 def load_bert(bert_name):
     ###################################
     # --------- Setup BERT ---------- #
     # Load transformers config and set output_hidden_states to False
     config = AutoConfig.from_pretrained(bert_name)
     config.output_hidden_states = True
     # Load BERT tokenizer
     tokenizer = AutoTokenizer.from_pretrained(bert_name)
     # Load the Transformers BERT model
     transformer_model = TFAutoModel.from_pretrained(bert_name)
     return tokenizer, transformer_model, config
コード例 #29
0
    def test_rag_sequence_from_pretrained(self):
        load_weight_prefix = "tf_rag_model_1"

        rag_config = self.get_rag_config()
        rag_decoder_tokenizer = BartTokenizer.from_pretrained(
            "facebook/bart-large-cnn")
        rag_question_encoder_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(
            "facebook/dpr-question_encoder-single-nq-base")
        rag_retriever = RagRetriever(
            rag_config,
            question_encoder_tokenizer=rag_question_encoder_tokenizer,
            generator_tokenizer=rag_decoder_tokenizer,
        )

        input_ids = rag_question_encoder_tokenizer(
            "who sings does he love me with reba",
            return_tensors="tf").input_ids
        decoder_input_ids = rag_decoder_tokenizer(
            "Linda Davis", return_tensors="tf").input_ids

        with tempfile.TemporaryDirectory() as tmp_dirname:
            rag_sequence = TFRagSequenceForGeneration.from_pretrained_question_encoder_generator(
                "facebook/dpr-question_encoder-single-nq-base",
                "facebook/bart-large-cnn",
                retriever=rag_retriever,
                config=rag_config,
            )
            # check that the from pretrained methods work
            rag_sequence.save_pretrained(tmp_dirname)
            rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever)

            output = rag_sequence(input_ids, labels=decoder_input_ids)

            loss_pretrained = output.loss
            del rag_sequence

        question_encoder = TFAutoModel.from_pretrained(
            "facebook/dpr-question_encoder-single-nq-base")
        generator = TFAutoModelForSeq2SeqLM.from_pretrained(
            "facebook/bart-large-cnn",
            load_weight_prefix=load_weight_prefix,
            name="generator")

        rag_sequence = TFRagSequenceForGeneration(
            config=rag_config,
            question_encoder=question_encoder,
            generator=generator,
            retriever=rag_retriever)

        output = rag_sequence(input_ids, labels=decoder_input_ids)

        loss_init = output.loss

        self.assertAlmostEqual(loss_pretrained, loss_init, places=4)
コード例 #30
0
def load_model_and_tokenizer(model_name, tensor_type):
    if tensor_type == "tf":
        from transformers import TFAutoModel as AutoModel
    elif tensor_type == "pt":
        from transformers import AutoModel

    model = AutoModel.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer