Python TFBertForSequenceClassification.from_pretrained Examples, transformers.TFBertForSequenceClassification.from_pretrained Python Examples

Example #1

0

Show file

def load_transformer(model_type):
    if model_type == "distilbert":
        tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')
        model = TFDistilBertForSequenceClassification.from_pretrained(
            "distilbert-base-uncased", num_labels=1)
    elif model_type == "bert_x12":
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=1)
    elif model_type == "bert_x24":
        tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-large-uncased", num_labels=1)
    elif model_type == "albert_v2_x12":
        tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
        model = TFAlbertForSequenceClassification.from_pretrained(
            "albert-base-v2", num_labels=1)
    elif model_type == "longformer_x12":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-base-4096", num_labels=1)
    elif model_type == "longformer_x24":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-large-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-large-4096", num_labels=1)
    else:
        raise ValueError(model_type + " was invalid")

    return model, tokenizer

Example #2

0

Show file

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.bert_layer = TFBertForSequenceClassification.from_pretrained(
         'bert-base-cased')
     self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
     self.cashed_train_dataset = None
     self.cashed_val_dataset = None

Example #3

0

Show file

    def load_model_from_zip(self, zip_archive):
        """a function that loads components of a serialized model from a zip
        given zip file using the python ZipFile interface and returns an
        instance of the model

        Arguments:

        zip_archive: ZipFile
            an instance of the python ZipFile interface that has loaded
            the file path specified by self.resource.disk_target

        Returns:

        model: Any
            any format of of machine learning model that will be stored
            in the self.model attribute for later use

        """

        # read the h5 bytes from the zip file
        with zip_archive.open('transformer.zip', "r") as file:
            model_bytes = file.read()  # read model bytes in the h5 format

        # load the bytes of the hugging face save path from a zip
        with tempfile.TemporaryDirectory() as directory:
            with tempfile.NamedTemporaryFile(suffix=".zip") as archive:
                archive.write(model_bytes)
                shutil.unpack_archive(archive.name, directory)
                return TFBert.from_pretrained(directory)

Example #4

0

Show file

File: lectura_y_preprocesado.py Project: joSanchez28/BERT_on_tweets

def carga_modelo_BERT(model_path):
    """Carga el modelo BERT preentrenado y que se encuentra en la ruta `model_path`"""
    # Parámetros del script usado por HuggingFace para hacer análisis de sentimientos sobre otro conjunto de datos
    USE_XLA = False
    USE_AMP = False
    #TASK = "sst-2"
    #TFDS_TASK = "sst2"
    num_labels = 2
    tf.config.optimizer.set_jit(USE_XLA)
    tf.config.optimizer.set_experimental_options(
        {"auto_mixed_precision": USE_AMP})

    # Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)
    config = BertConfig.from_pretrained("bert-base-cased",
                                        num_labels=num_labels)
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    model = TFBertForSequenceClassification.from_pretrained("bert-base-cased",
                                                            config=config)

    opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
    model.compile(optimizer=opt, loss=loss, metrics=[metric])
    model.load_weights(model_path)

    return model, tokenizer, config

Example #5

0

Show file

File: ernie.py Project: TrendingTechnology/ernie-1

    def _load_remote_model(self, model_name, tokenizer_kwargs, model_kwargs):
        if model_name not in ModelsByFamily.Supported:
            raise ValueError(f'Model {model_name} not supported.')

        do_lower_case = False
        if 'uncased' in model_name.lower():
            do_lower_case = True
        tokenizer_kwargs.update({'do_lower_case': do_lower_case})

        self._tokenizer = None
        self._model = None

        if model_name in ModelsByFamily.Bert:
            self._tokenizer = BertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.Roberta:
            self._tokenizer = RobertaTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFRobertaForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.XLNet:
            self._tokenizer = XLNetTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFXLNetForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.DistilBert:
            self._tokenizer = DistilBertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFDistilBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)

        assert self._tokenizer and self._model

Example #6

0

Show file

File: fine_tuning.py Project: jerbarnes/norbert_benchmarking

 def build_model(self,
                 max_length,
                 train_batch_size,
                 learning_rate,
                 epochs,
                 num_labels,
                 tagset=None,
                 gpu_growth=True,
                 eval_batch_size=32):
     #if gpu_growth:
     #    model_utils.set_tf_memory_growth()
     if self.task == "pos":
         self.model = TFBertForTokenClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = MBERT_Tokenizer_pos.from_pretrained(
             self.model_name, do_lower_case=False)
     else:
         self.model = TFBertForSequenceClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = BertTokenizer.from_pretrained(self.model_name,
                                                        do_lower_case=False)
     #self.model, self.tokenizer = model_utils.create_model(self.short_model_name, self.task, num_labels)
     self.model = model_utils.compile_model(self.model, self.task,
                                            learning_rate)
     print("Successfully built", self.model_name)
     self.max_length = max_length
     self.train_batch_size = train_batch_size
     self.learning_rate = learning_rate
     self.epochs = epochs
     self.num_labels = num_labels
     if tagset:
         self.tagset = tagset
         self.label_map = {label: i for i, label in enumerate(tagset)}
     self.eval_batch_size = eval_batch_size

Example #7

0

Show file

File: HUGGINGFACE_BERT_NN.py Project: charlotteout/Econobox-SA-1

    def build(self, **kwargs):
        optimizer = kwargs.get("optimizer", "adam")
        metrics = kwargs.get("metrics", ['accuracy'])
        dropout_rate = kwargs.get('dropout_rate', 0.5)

        ## BUILDING THE GRAPH
        input_ids = tf.keras.layers.Input(shape=(1,50), name='input_ids', dtype=tf.int32)
        input_mask = tf.keras.layers.Input(shape=(1,50), name='input_mask', dtype=tf.int32)
        #bert_layer = Lambda_Bert_Layer(trainable=False, dynamic=True)
        bert_layer = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
        bert_layer.bert.trainable=False
        bert_output = bert_layer(input_ids[:,0,:], attention_mask=input_mask[:,0,:])
        """
        bert_layer.trainable = False
        bert_output=bert_output[0][:,-1,:]
        last_state = tf.reshape(bert_output, shape=(-1,768))
        dense_out_1 = tf.keras.layers.Dense(units=768, activation="relu")(last_state)  # reshape_lambda_layer
        dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1)
        dense_out_2 = tf.keras.layers.Dense(units=200, activation="relu")(dense_out_1)
        dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2)
        logits = tf.keras.layers.Dense(units=2, activation='softmax')(dense_out_2)
        """
        logits = bert_output[0]

        self.model = tf.keras.Model(inputs=(input_ids,input_mask), outputs=logits)
        self.model.compile(optimizer=optimizer,
                           loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                           metrics=metrics,
                           run_eagerly=True)
        self.model.summary()

Example #8

0

Show file

File: models.py Project: isabelcachola/ML-Fall2020-Final-Project

    def train(self,
              train_data,
              train_labels,
              dev_data,
              dev_labels,
              save_model_path=f"models/bert.pkl"):
        ds_train_encoded = self.encode_examples(
            train_data, train_labels).batch(self.batch_size)
        ds_dev_encoded = self.encode_examples(dev_data, dev_labels).batch(
            self.batch_size)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'bert-base-uncased')
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate,
                                             epsilon=1e-08)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
        self.model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

        self.model.fit(ds_train_encoded,
                       epochs=self.epochs,
                       validation_data=ds_dev_encoded)

        predictions = self.model.predict(ds_dev_encoded, verbose=1).logits
        self.model.save_weights(save_model_path)
        print('Validation Loss:', log_loss(dev_labels, predictions))

Example #9

0

Show file

def train_model(request):
    if request.method == "POST":
        inputed_batch_size = int(request.POST['batch_size'])
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        #IMDB movie reviews
        dataset = get_dataset()
        #save dataset to file memory
        clean_dataset(dataset)

        #training and testing datasets here
        train = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='training',
            seed=123)

        test = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='validation',
            seed=123)

        #convert to pandas dataframes
        train = convert_dataset_to_dataframe(train)
        test = convert_dataset_to_dataframe(test)

        #convert data to tf datasets
        train_InputExamples, validation_InputExamples = convert_data_to_examples(
            train, test, DATA_COLUMN, LABEL_COLUMN)

        train_data = convert_examples_to_tf_dataset(list(train_InputExamples),
                                                    tokenizer)
        train_data = train_data.shuffle(100).batch(32).repeat(2)

        validation_data = convert_examples_to_tf_dataset(
            list(validation_InputExamples), tokenizer)
        validation_data = validation_data.batch(32)

        #fine tune it!
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5,
                                               epsilon=1e-08,
                                               clipnorm=1.0),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])

        model.fit(train_data, epochs=2, validation_data=validation_data)

        #save the model weights to memory to be used later
        model.save_pretrained(MOOD_MODEL_DIR)

    context = {}
    return render(request, 'mood_classifier/train_model_button.html', context)

Example #10

0

Show file

File: main.py Project: MathieuDecline/LittleBill_PRD

    def __init__(self):

        model_name = 'bert-base-cased'
        self.tokenizer = BertTokenizer.from_pretrained(model_name)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'C:/Users/lbbre/Documents/ECAM 5/PRD/content/assets')
        self.max_seq_len = 32

Example #11

0

Show file

File: bert_semantic_equivalence.py Project: piyumalanthony/WellcomeML

    def load(self, path):
        """Loads model from path"""
        self.strategy = self._get_distributed_strategy()

        with self.strategy.scope():
            self.initialise_models()
            self.model = TFBertForSequenceClassification.from_pretrained(path)
        self.trained_ = True

Example #12

0

Show file

File: intent_classification.py Project: we-chatter/chatbot_utils

 def model_build(self):
     self.bertConfig = BertConfig.from_pretrained(
         os.path.join(self.pretrain_path, "config.json"),
         num_labels=self.num_classes)
     self.model = TFBertForSequenceClassification.from_pretrained(
         os.path.join(self.pretrain_path, "tf_model.h5"),
         config=self.bertConfig)
     self.model.summary()

Example #13

0

Show file

File: deeplegis.py Project: lukevancleve/DeepLegis

    def build(self):

        # Handle the Meta Data
        ids = tf.keras.Input((self.config.max_length, ),
                             dtype=tf.int32,
                             name='input_ids')
        vn = tf.keras.Input((1, ), dtype=tf.float32, name='version_number')
        pl = tf.keras.Input((1, ), dtype=tf.float32, name='partisan_lean')
        cat = tf.keras.Input((self.config.n_sc_id_classes, ),
                             dtype=tf.float32,
                             name='sc_id')
        meta = tf.concat([vn, pl, cat], axis=-1)

        # Load the initial weights with the ones trained from the DL model without text
        if self.config.load_weights_from_no_text:
            #if 'no_text_dense_layer_initialization_path' in self.config:
            print(
                "Usinging pretrained weights from the no_text model! --------------------"
            )
            model_location = self.config.data_vol + "models/no_text/full_model.h5"
            ntdl = tf.keras.layers.Dense(
                self.config.n_dense_layers,
                activation='relu',
                name="no_text_dense_layer",
                kernel_initializer=noTextKernelInitializer(
                    model_location=model_location),
                bias_initializer=noTextBiasInitializer(
                    model_location=model_location))
        else:
            ntdl = tf.keras.layers.Dense(self.config.n_dense_layers,
                                         activation='relu',
                                         name="no_text_dense_layer")
        meta = ntdl(meta)

        # Handle the Transformer
        self.base_transformer_model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        x = self.base_transformer_model.bert(ids)  # Get the main Layer
        x = x['last_hidden_state'][:, 0, :]
        x = tf.keras.layers.Dropout(0.2)(x)

        # Combine the two and run through another dense layer.
        x = tf.concat([x, meta], axis=-1)
        x = tf.keras.layers.Dense(self.config.n_dense_layers,
                                  activation='relu')(x)
        x = tf.keras.layers.Dropout(0.2)(x)
        x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
        dl_model = tf.keras.Model(inputs={
            "input_ids": ids,
            "version_number": vn,
            "partisan_lean": pl,
            "sc_id": cat
        },
                                  outputs=[x])

        self.deep_legis_model = dl_model

Example #14

0

Show file

File: bert_classification_tf2.py Project: nbcstevenchen/BERT-for-Classification

 def create_model_2(self):
     model = TFBertForSequenceClassification.from_pretrained(
         'bert-base-uncased')
     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,
                                                      epsilon=1e-08),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(
                       from_logits=True),
                   metrics=["acc"])
     model.summary()
     return model

Example #15

0

Show file

    def __init__(self,
                 n_intents=None,
                 dropout=0.2,
                 model_name="bert-base-uncased"):
        super().__init__(name="intent_classifier")

        self.tokenizer = Tokenizer()
        self.bert = TFBertForSequenceClassification.from_pretrained(model_name)
        self.dropout = Dropout(dropout)
        self.intent_classifier = Dense(n_intents, activation='softmax')

Example #16

0

Show file

def model_compile():
    model = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased', num_labels=2)
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    return model

Example #17

0

Show file

    def __init__(self, extractor, config, *args, **kwargs):
        super(TFVanillaBert_Class, self).__init__(*args, **kwargs)
        self.extractor = extractor

        # TFBertForSequenceClassification contains both the BERT and the linear classifier layers
        self.bert = TFBertForSequenceClassification.from_pretrained(
            config["pretrained"], hidden_dropout_prob=0.1)

        assert extractor.config[
            "numpassages"] == 1, "numpassages should be 1 for TFVanillaBERT"
        self.config = config

Example #18

0

Show file

File: best_model_implementation.py Project: CS-433/cs-433-project-2-rojlet_lml

def build_model():
    import tensorflow as tf
    from transformers import TFBertForSequenceClassification
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE, from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=3e-5)

    model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])
    return model

Example #19

0

Show file

File: bert_classifier.py Project: piyumalanthony/WellcomeML

    def _init_model(self, num_labels=2):
        config = {"name": self.pretrained, "from_pt": self.from_pt}
        if self.pretrained in PRETRAINED_CONFIG:
            config = PRETRAINED_CONFIG[self.pretrained]

        pretrained = config["name"]
        from_pt = config["from_pt"]

        self.tokenizer = BertTokenizer.from_pretrained(pretrained)
        self.model = TFBertForSequenceClassification.from_pretrained(
            pretrained, from_pt=from_pt, num_labels=num_labels)

Example #20

0

Show file

def fine_tune_model(ds, export_dir):
    (train_dataset, test_dataset, val_dataset) = get_test_train_val_datasets(ds)
    learning_rate = 2e-5
    number_of_epochs = 1
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
    bert_history = model.fit(train_dataset, epochs=number_of_epochs, validation_data=val_dataset)
    model.save_pretrained(export_dir)
    return model

Example #21

0

Show file

File: test_transformers.py Project: vinceyzw/keras-onnx

 def test_TFBertForSequenceClassification(self):
     from transformers import BertTokenizer, TFBertForSequenceClassification
     pretrained_weights = 'bert-base-uncased'
     tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFBertForSequenceClassification.from_pretrained(
         pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx,
                          predictions, self.model_files))

Example #22

0

Show file

File: bert_example.py Project: laigen/gs-research-workflow

def run_first_test():
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :]  # Batch size 1
    print(f"input_ids:{input_ids}")
    outputs = model(input_ids)
    logits = outputs[0]
    print(logits)
    print("-" * 30)
    print(f"outputs:{outputs}")
    print("-" * 30)
    model.summary()

Example #23

0

Show file

File: model.py Project: s9891326/Fine-Tuning-BERT

    def get_model(self):
        """
        Build BERT model. Using origin way to build BERT model On the Transformers package.

        But this way don't custom input and output, so that can't adapt to deepnlp project worker.
        Need to change deepnlp project parameter.
        """
        model = TFBertForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path=self.model_name,
            from_pt=True if self.from_pt_word in self.model_name else False,
            num_labels=len(self.label_list))

        return model

Example #24

0

Show file

File: bert_semantic_equivalence.py Project: piyumalanthony/WellcomeML

    def initialise_models(self):
        if self.pretrained == "bert":
            model_name = "bert-base-cased"
            from_pt = False
        elif self.pretrained == "scibert":
            model_name = "allenai/scibert_scivocab_cased"
            from_pt = True

        self.config = BertConfig.from_pretrained(model_name, num_labels=2)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = TFBertForSequenceClassification.from_pretrained(
            model_name, config=self.config, from_pt=from_pt)
        return self.model

Example #25

0

Show file

File: utils.py Project: Raj-Parekh24/Sentiment-Analysis

def bert_result(sentence):
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model.load_weights("senti_weights.ckpt")
    tf_batch = tokenizer(sentence,
                         max_length=128,
                         padding=True,
                         truncation=True,
                         return_tensors='tf')
    tf_outputs = model(tf_batch)
    tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
    label = tf_predictions.numpy()
    return int(label[0][1] * 100)

Example #26

0

Show file

    def __init__(self, weights_path):
        """
    :param weights_path: specifies where load/store weights of the model
    :type weights_path: str
    """
        super().__init__(weights_path)

        # A tensorflow model of Bert base (uncased), pre-trained.
        # More on it on our report.
        self.__model = TFBertForSequenceClassification.from_pretrained(
            'bert-large-uncased')

        # Instanciating a proper tokenizer for Bert
        self.__tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

Example #27

0

Show file

File: model.py Project: bhavyajeet/PositivePanda

    def __init__(self,
                 model_path="./modelSentiment",
                 tokenizer="bert-base-uncased"):
        """
        Initialize model

        Downloads the model if not present
        """
        if os.path.exists(model_path):
            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)
        else:
            print("Downloading model...")
            Model.download_file_from_google_drive(
                "1uthnEb7WYnIR6y0VVX4gMPoqG-X0oKRK", "Modelfile.zip")

            with zipfile.ZipFile("Modelfile.zip", "r") as zip_ref:
                zip_ref.extractall("./")

            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)

        self.tokenizer = BertTokenizer.from_pretrained(tokenizer)

Example #28

0

Show file

def evaluate(dataset, limit_num_sents: bool):
    # Split and tokenize dataset
    split = Split_BERT()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_train, y_train = split.get_X_y(dataset['train'] + dataset['oos_train'], limit_num_sents=limit_num_sents,
                                     set_type='train')
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents,
                                   set_type='test')

    train_ids, train_attention_masks, train_labels = tokenize_BERT(X_train, y_train, tokenizer)
    val_ids, val_attention_masks, val_labels = tokenize_BERT(X_val, y_val, tokenizer)
    test_ids, test_attention_masks, test_labels = tokenize_BERT(X_test, y_test, tokenizer)

    num_labels = len(split.intents_dct.keys())

    # Train model
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                            num_labels=num_labels)  # we have to adjust the number of labels
    print('\nBert Model', model.summary())

    log_dir = 'tensorboard_data/tb_bert'
    model_save_path = './models/bert_model.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path, save_weights_only=True, monitor='val_loss',
                                           mode='min',
                                           save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model.fit([train_ids, train_attention_masks],
                        train_labels,
                        batch_size=32,
                        epochs=5,
                        validation_data=([val_ids, val_attention_masks], val_labels),
                        callbacks=callbacks)

    # Test
    testing = Testing(model, {'test_ids': test_ids, 'test_attention_masks': test_attention_masks}, test_labels,
                      'bert', split.intents_dct['oos'])
    results_dct = testing.test_train()

    return results_dct

Example #29

0

Show file

File: aibot.py Project: dpooria/aibot

    def __init__(self):
        self.modified = False
        # load models
        self.classifier_tokenizer = BertTokenizer.from_pretrained(
            CLASSIFIER_PATH)
        self.classifier_config = AutoConfig.from_pretrained(CLASSIFIER_PATH)
        self.classifier_model = TFBertForSequenceClassification.from_pretrained(
            CLASSIFIER_PATH)

        self.ner_tokenizer = AutoTokenizer.from_pretrained(PARSBERTNER_PATH)
        self.ner_config = AutoConfig.from_pretrained(PARSBERTNER_PATH)
        self.ner_model = TFAutoModelForTokenClassification.from_pretrained(
            PARSBERTNER_PATH)
        self.weather_api = Weather()
        self.adhan_api = Adhan()
        self.time_api = Time()
        self.calender_api = Calender()

Example #30

0

Show file

File: model.py Project: patrik-kojanec/NLP_Project_Kojanec_Rus

    def fit(self,
            messages,
            y,
            epochs=2,
            validation_percent=0.15,
            allow_import=True):
        if self.english:
            messages = (messages['Translation'].values)
        else:
            messages = (messages['Message'].values)

        y = self.Lab_Encoder.fit_transform(
            y.to_numpy().astype(str)).astype(float)

        # preparing the data:
        # split in train and validation

        if os.path.exists(self.path) and allow_import:
            print("Loading Pretrained Model")
            self.model = bert_class.from_pretrained(self.path)
            return (self)
        else:

            X_train, X_val, y_train, y_val = train_test_split(
                messages, y, test_size=validation_percent, random_state=0)

            # tokenizing the data and making tf.dataset
            X_train_input = self.convert_to_input(X_train.astype(str))
            X_val_input = self.convert_to_input(X_val.astype(str))

            train_ds = tf.data.Dataset.from_tensor_slices(
                (X_train_input[0], X_train_input[1], X_train_input[2],
                 y_train)).map(
                     self.example_to_features).shuffle(100).batch(12).repeat(5)
            val_ds = tf.data.Dataset.from_tensor_slices(
                (X_val_input[0], X_val_input[1], X_val_input[2],
                 y_val)).map(self.example_to_features).batch(12)

            self.model.fit(train_ds,
                           epochs=epochs,
                           validation_data=val_ds,
                           verbose=1)
            os.mkdir(self.path)
            self.model.save_pretrained(self.path)
            return self