Python TFBertForSequenceClassification.from_pretrained Beispiele, transformers.TFBertForSequenceClassification.from_pretrained Python Beispiele

Beispiel #1

0

Datei anzeigen

def load_transformer(model_type):
    if model_type == "distilbert":
        tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')
        model = TFDistilBertForSequenceClassification.from_pretrained(
            "distilbert-base-uncased", num_labels=1)
    elif model_type == "bert_x12":
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=1)
    elif model_type == "bert_x24":
        tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-large-uncased", num_labels=1)
    elif model_type == "albert_v2_x12":
        tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
        model = TFAlbertForSequenceClassification.from_pretrained(
            "albert-base-v2", num_labels=1)
    elif model_type == "longformer_x12":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-base-4096", num_labels=1)
    elif model_type == "longformer_x24":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-large-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-large-4096", num_labels=1)
    else:
        raise ValueError(model_type + " was invalid")

    return model, tokenizer

Beispiel #2

0

Datei anzeigen

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.bert_layer = TFBertForSequenceClassification.from_pretrained(
         'bert-base-cased')
     self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
     self.cashed_train_dataset = None
     self.cashed_val_dataset = None

Beispiel #3

0

Datei anzeigen

    def load_model_from_zip(self, zip_archive):
        """a function that loads components of a serialized model from a zip
        given zip file using the python ZipFile interface and returns an
        instance of the model

        Arguments:

        zip_archive: ZipFile
            an instance of the python ZipFile interface that has loaded
            the file path specified by self.resource.disk_target

        Returns:

        model: Any
            any format of of machine learning model that will be stored
            in the self.model attribute for later use

        """

        # read the h5 bytes from the zip file
        with zip_archive.open('transformer.zip', "r") as file:
            model_bytes = file.read()  # read model bytes in the h5 format

        # load the bytes of the hugging face save path from a zip
        with tempfile.TemporaryDirectory() as directory:
            with tempfile.NamedTemporaryFile(suffix=".zip") as archive:
                archive.write(model_bytes)
                shutil.unpack_archive(archive.name, directory)
                return TFBert.from_pretrained(directory)

Beispiel #4

0

Datei anzeigen

Datei: lectura_y_preprocesado.py Projekt: joSanchez28/BERT_on_tweets

def carga_modelo_BERT(model_path):
    """Carga el modelo BERT preentrenado y que se encuentra en la ruta `model_path`"""
    # Parámetros del script usado por HuggingFace para hacer análisis de sentimientos sobre otro conjunto de datos
    USE_XLA = False
    USE_AMP = False
    #TASK = "sst-2"
    #TFDS_TASK = "sst2"
    num_labels = 2
    tf.config.optimizer.set_jit(USE_XLA)
    tf.config.optimizer.set_experimental_options(
        {"auto_mixed_precision": USE_AMP})

    # Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)
    config = BertConfig.from_pretrained("bert-base-cased",
                                        num_labels=num_labels)
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    model = TFBertForSequenceClassification.from_pretrained("bert-base-cased",
                                                            config=config)

    opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
    model.compile(optimizer=opt, loss=loss, metrics=[metric])
    model.load_weights(model_path)

    return model, tokenizer, config

Beispiel #5

0

Datei anzeigen

Datei: ernie.py Projekt: TrendingTechnology/ernie-1

    def _load_remote_model(self, model_name, tokenizer_kwargs, model_kwargs):
        if model_name not in ModelsByFamily.Supported:
            raise ValueError(f'Model {model_name} not supported.')

        do_lower_case = False
        if 'uncased' in model_name.lower():
            do_lower_case = True
        tokenizer_kwargs.update({'do_lower_case': do_lower_case})

        self._tokenizer = None
        self._model = None

        if model_name in ModelsByFamily.Bert:
            self._tokenizer = BertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.Roberta:
            self._tokenizer = RobertaTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFRobertaForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.XLNet:
            self._tokenizer = XLNetTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFXLNetForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.DistilBert:
            self._tokenizer = DistilBertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFDistilBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)

        assert self._tokenizer and self._model

Beispiel #6

0

Datei anzeigen

Datei: fine_tuning.py Projekt: jerbarnes/norbert_benchmarking

 def build_model(self,
                 max_length,
                 train_batch_size,
                 learning_rate,
                 epochs,
                 num_labels,
                 tagset=None,
                 gpu_growth=True,
                 eval_batch_size=32):
     #if gpu_growth:
     #    model_utils.set_tf_memory_growth()
     if self.task == "pos":
         self.model = TFBertForTokenClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = MBERT_Tokenizer_pos.from_pretrained(
             self.model_name, do_lower_case=False)
     else:
         self.model = TFBertForSequenceClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = BertTokenizer.from_pretrained(self.model_name,
                                                        do_lower_case=False)
     #self.model, self.tokenizer = model_utils.create_model(self.short_model_name, self.task, num_labels)
     self.model = model_utils.compile_model(self.model, self.task,
                                            learning_rate)
     print("Successfully built", self.model_name)
     self.max_length = max_length
     self.train_batch_size = train_batch_size
     self.learning_rate = learning_rate
     self.epochs = epochs
     self.num_labels = num_labels
     if tagset:
         self.tagset = tagset
         self.label_map = {label: i for i, label in enumerate(tagset)}
     self.eval_batch_size = eval_batch_size

Beispiel #7

0

Datei anzeigen

Datei: HUGGINGFACE_BERT_NN.py Projekt: charlotteout/Econobox-SA-1

    def build(self, **kwargs):
        optimizer = kwargs.get("optimizer", "adam")
        metrics = kwargs.get("metrics", ['accuracy'])
        dropout_rate = kwargs.get('dropout_rate', 0.5)

        ## BUILDING THE GRAPH
        input_ids = tf.keras.layers.Input(shape=(1,50), name='input_ids', dtype=tf.int32)
        input_mask = tf.keras.layers.Input(shape=(1,50), name='input_mask', dtype=tf.int32)
        #bert_layer = Lambda_Bert_Layer(trainable=False, dynamic=True)
        bert_layer = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
        bert_layer.bert.trainable=False
        bert_output = bert_layer(input_ids[:,0,:], attention_mask=input_mask[:,0,:])
        """
        bert_layer.trainable = False
        bert_output=bert_output[0][:,-1,:]
        last_state = tf.reshape(bert_output, shape=(-1,768))
        dense_out_1 = tf.keras.layers.Dense(units=768, activation="relu")(last_state)  # reshape_lambda_layer
        dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1)
        dense_out_2 = tf.keras.layers.Dense(units=200, activation="relu")(dense_out_1)
        dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2)
        logits = tf.keras.layers.Dense(units=2, activation='softmax')(dense_out_2)
        """
        logits = bert_output[0]

        self.model = tf.keras.Model(inputs=(input_ids,input_mask), outputs=logits)
        self.model.compile(optimizer=optimizer,
                           loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                           metrics=metrics,
                           run_eagerly=True)
        self.model.summary()

Beispiel #8

0

Datei anzeigen

Datei: models.py Projekt: isabelcachola/ML-Fall2020-Final-Project

    def train(self,
              train_data,
              train_labels,
              dev_data,
              dev_labels,
              save_model_path=f"models/bert.pkl"):
        ds_train_encoded = self.encode_examples(
            train_data, train_labels).batch(self.batch_size)
        ds_dev_encoded = self.encode_examples(dev_data, dev_labels).batch(
            self.batch_size)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'bert-base-uncased')
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate,
                                             epsilon=1e-08)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
        self.model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

        self.model.fit(ds_train_encoded,
                       epochs=self.epochs,
                       validation_data=ds_dev_encoded)

        predictions = self.model.predict(ds_dev_encoded, verbose=1).logits
        self.model.save_weights(save_model_path)
        print('Validation Loss:', log_loss(dev_labels, predictions))

Beispiel #9

0

Datei anzeigen

def train_model(request):
    if request.method == "POST":
        inputed_batch_size = int(request.POST['batch_size'])
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        #IMDB movie reviews
        dataset = get_dataset()
        #save dataset to file memory
        clean_dataset(dataset)

        #training and testing datasets here
        train = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='training',
            seed=123)

        test = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='validation',
            seed=123)

        #convert to pandas dataframes
        train = convert_dataset_to_dataframe(train)
        test = convert_dataset_to_dataframe(test)

        #convert data to tf datasets
        train_InputExamples, validation_InputExamples = convert_data_to_examples(
            train, test, DATA_COLUMN, LABEL_COLUMN)

        train_data = convert_examples_to_tf_dataset(list(train_InputExamples),
                                                    tokenizer)
        train_data = train_data.shuffle(100).batch(32).repeat(2)

        validation_data = convert_examples_to_tf_dataset(
            list(validation_InputExamples), tokenizer)
        validation_data = validation_data.batch(32)

        #fine tune it!
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5,
                                               epsilon=1e-08,
                                               clipnorm=1.0),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])

        model.fit(train_data, epochs=2, validation_data=validation_data)

        #save the model weights to memory to be used later
        model.save_pretrained(MOOD_MODEL_DIR)

    context = {}
    return render(request, 'mood_classifier/train_model_button.html', context)

Beispiel #10

0

Datei anzeigen

Datei: main.py Projekt: MathieuDecline/LittleBill_PRD

    def __init__(self):

        model_name = 'bert-base-cased'
        self.tokenizer = BertTokenizer.from_pretrained(model_name)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'C:/Users/lbbre/Documents/ECAM 5/PRD/content/assets')
        self.max_seq_len = 32

Beispiel #11

0

Datei anzeigen

Datei: bert_semantic_equivalence.py Projekt: piyumalanthony/WellcomeML

    def load(self, path):
        """Loads model from path"""
        self.strategy = self._get_distributed_strategy()

        with self.strategy.scope():
            self.initialise_models()
            self.model = TFBertForSequenceClassification.from_pretrained(path)
        self.trained_ = True

Beispiel #12

0

Datei anzeigen

Datei: intent_classification.py Projekt: we-chatter/chatbot_utils

 def model_build(self):
     self.bertConfig = BertConfig.from_pretrained(
         os.path.join(self.pretrain_path, "config.json"),
         num_labels=self.num_classes)
     self.model = TFBertForSequenceClassification.from_pretrained(
         os.path.join(self.pretrain_path, "tf_model.h5"),
         config=self.bertConfig)
     self.model.summary()

Beispiel #13

0

Datei anzeigen

Datei: deeplegis.py Projekt: lukevancleve/DeepLegis

    def build(self):

        # Handle the Meta Data
        ids = tf.keras.Input((self.config.max_length, ),
                             dtype=tf.int32,
                             name='input_ids')
        vn = tf.keras.Input((1, ), dtype=tf.float32, name='version_number')
        pl = tf.keras.Input((1, ), dtype=tf.float32, name='partisan_lean')
        cat = tf.keras.Input((self.config.n_sc_id_classes, ),
                             dtype=tf.float32,
                             name='sc_id')
        meta = tf.concat([vn, pl, cat], axis=-1)

        # Load the initial weights with the ones trained from the DL model without text
        if self.config.load_weights_from_no_text:
            #if 'no_text_dense_layer_initialization_path' in self.config:
            print(
                "Usinging pretrained weights from the no_text model! --------------------"
            )
            model_location = self.config.data_vol + "models/no_text/full_model.h5"
            ntdl = tf.keras.layers.Dense(
                self.config.n_dense_layers,
                activation='relu',
                name="no_text_dense_layer",
                kernel_initializer=noTextKernelInitializer(
                    model_location=model_location),
                bias_initializer=noTextBiasInitializer(
                    model_location=model_location))
        else:
            ntdl = tf.keras.layers.Dense(self.config.n_dense_layers,
                                         activation='relu',
                                         name="no_text_dense_layer")
        meta = ntdl(meta)

        # Handle the Transformer
        self.base_transformer_model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        x = self.base_transformer_model.bert(ids)  # Get the main Layer
        x = x['last_hidden_state'][:, 0, :]
        x = tf.keras.layers.Dropout(0.2)(x)

        # Combine the two and run through another dense layer.
        x = tf.concat([x, meta], axis=-1)
        x = tf.keras.layers.Dense(self.config.n_dense_layers,
                                  activation='relu')(x)
        x = tf.keras.layers.Dropout(0.2)(x)
        x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
        dl_model = tf.keras.Model(inputs={
            "input_ids": ids,
            "version_number": vn,
            "partisan_lean": pl,
            "sc_id": cat
        },
                                  outputs=[x])

        self.deep_legis_model = dl_model

Beispiel #14

0

Datei anzeigen

Datei: bert_classification_tf2.py Projekt: nbcstevenchen/BERT-for-Classification

 def create_model_2(self):
     model = TFBertForSequenceClassification.from_pretrained(
         'bert-base-uncased')
     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,
                                                      epsilon=1e-08),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(
                       from_logits=True),
                   metrics=["acc"])
     model.summary()
     return model

Beispiel #15

0

Datei anzeigen

    def __init__(self,
                 n_intents=None,
                 dropout=0.2,
                 model_name="bert-base-uncased"):
        super().__init__(name="intent_classifier")

        self.tokenizer = Tokenizer()
        self.bert = TFBertForSequenceClassification.from_pretrained(model_name)
        self.dropout = Dropout(dropout)
        self.intent_classifier = Dense(n_intents, activation='softmax')

Beispiel #16

0

Datei anzeigen

def model_compile():
    model = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased', num_labels=2)
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    return model

Beispiel #17

0

Datei anzeigen

    def __init__(self, extractor, config, *args, **kwargs):
        super(TFVanillaBert_Class, self).__init__(*args, **kwargs)
        self.extractor = extractor

        # TFBertForSequenceClassification contains both the BERT and the linear classifier layers
        self.bert = TFBertForSequenceClassification.from_pretrained(
            config["pretrained"], hidden_dropout_prob=0.1)

        assert extractor.config[
            "numpassages"] == 1, "numpassages should be 1 for TFVanillaBERT"
        self.config = config

Beispiel #18

0

Datei anzeigen

Datei: best_model_implementation.py Projekt: CS-433/cs-433-project-2-rojlet_lml

def build_model():
    import tensorflow as tf
    from transformers import TFBertForSequenceClassification
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE, from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=3e-5)

    model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])
    return model

Beispiel #19

0

Datei anzeigen

Datei: bert_classifier.py Projekt: piyumalanthony/WellcomeML

    def _init_model(self, num_labels=2):
        config = {"name": self.pretrained, "from_pt": self.from_pt}
        if self.pretrained in PRETRAINED_CONFIG:
            config = PRETRAINED_CONFIG[self.pretrained]

        pretrained = config["name"]
        from_pt = config["from_pt"]

        self.tokenizer = BertTokenizer.from_pretrained(pretrained)
        self.model = TFBertForSequenceClassification.from_pretrained(
            pretrained, from_pt=from_pt, num_labels=num_labels)

Beispiel #20

0

Datei anzeigen

def fine_tune_model(ds, export_dir):
    (train_dataset, test_dataset, val_dataset) = get_test_train_val_datasets(ds)
    learning_rate = 2e-5
    number_of_epochs = 1
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
    bert_history = model.fit(train_dataset, epochs=number_of_epochs, validation_data=val_dataset)
    model.save_pretrained(export_dir)
    return model

Beispiel #21

0

Datei anzeigen

Datei: test_transformers.py Projekt: vinceyzw/keras-onnx

 def test_TFBertForSequenceClassification(self):
     from transformers import BertTokenizer, TFBertForSequenceClassification
     pretrained_weights = 'bert-base-uncased'
     tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFBertForSequenceClassification.from_pretrained(
         pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx,
                          predictions, self.model_files))

Beispiel #22

0

Datei anzeigen

Datei: bert_example.py Projekt: laigen/gs-research-workflow

def run_first_test():
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :]  # Batch size 1
    print(f"input_ids:{input_ids}")
    outputs = model(input_ids)
    logits = outputs[0]
    print(logits)
    print("-" * 30)
    print(f"outputs:{outputs}")
    print("-" * 30)
    model.summary()

Beispiel #23

0

Datei anzeigen

Datei: model.py Projekt: s9891326/Fine-Tuning-BERT

    def get_model(self):
        """
        Build BERT model. Using origin way to build BERT model On the Transformers package.

        But this way don't custom input and output, so that can't adapt to deepnlp project worker.
        Need to change deepnlp project parameter.
        """
        model = TFBertForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path=self.model_name,
            from_pt=True if self.from_pt_word in self.model_name else False,
            num_labels=len(self.label_list))

        return model

Beispiel #24

0

Datei anzeigen

Datei: bert_semantic_equivalence.py Projekt: piyumalanthony/WellcomeML

    def initialise_models(self):
        if self.pretrained == "bert":
            model_name = "bert-base-cased"
            from_pt = False
        elif self.pretrained == "scibert":
            model_name = "allenai/scibert_scivocab_cased"
            from_pt = True

        self.config = BertConfig.from_pretrained(model_name, num_labels=2)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = TFBertForSequenceClassification.from_pretrained(
            model_name, config=self.config, from_pt=from_pt)
        return self.model

Beispiel #25

0

Datei anzeigen

Datei: utils.py Projekt: Raj-Parekh24/Sentiment-Analysis

def bert_result(sentence):
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model.load_weights("senti_weights.ckpt")
    tf_batch = tokenizer(sentence,
                         max_length=128,
                         padding=True,
                         truncation=True,
                         return_tensors='tf')
    tf_outputs = model(tf_batch)
    tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
    label = tf_predictions.numpy()
    return int(label[0][1] * 100)

Beispiel #26

0

Datei anzeigen

    def __init__(self, weights_path):
        """
    :param weights_path: specifies where load/store weights of the model
    :type weights_path: str
    """
        super().__init__(weights_path)

        # A tensorflow model of Bert base (uncased), pre-trained.
        # More on it on our report.
        self.__model = TFBertForSequenceClassification.from_pretrained(
            'bert-large-uncased')

        # Instanciating a proper tokenizer for Bert
        self.__tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

Beispiel #27

0

Datei anzeigen

Datei: model.py Projekt: bhavyajeet/PositivePanda

    def __init__(self,
                 model_path="./modelSentiment",
                 tokenizer="bert-base-uncased"):
        """
        Initialize model

        Downloads the model if not present
        """
        if os.path.exists(model_path):
            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)
        else:
            print("Downloading model...")
            Model.download_file_from_google_drive(
                "1uthnEb7WYnIR6y0VVX4gMPoqG-X0oKRK", "Modelfile.zip")

            with zipfile.ZipFile("Modelfile.zip", "r") as zip_ref:
                zip_ref.extractall("./")

            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)

        self.tokenizer = BertTokenizer.from_pretrained(tokenizer)

Beispiel #28

0

Datei anzeigen

def evaluate(dataset, limit_num_sents: bool):
    # Split and tokenize dataset
    split = Split_BERT()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_train, y_train = split.get_X_y(dataset['train'] + dataset['oos_train'], limit_num_sents=limit_num_sents,
                                     set_type='train')
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents,
                                   set_type='test')

    train_ids, train_attention_masks, train_labels = tokenize_BERT(X_train, y_train, tokenizer)
    val_ids, val_attention_masks, val_labels = tokenize_BERT(X_val, y_val, tokenizer)
    test_ids, test_attention_masks, test_labels = tokenize_BERT(X_test, y_test, tokenizer)

    num_labels = len(split.intents_dct.keys())

    # Train model
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                            num_labels=num_labels)  # we have to adjust the number of labels
    print('\nBert Model', model.summary())

    log_dir = 'tensorboard_data/tb_bert'
    model_save_path = './models/bert_model.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path, save_weights_only=True, monitor='val_loss',
                                           mode='min',
                                           save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model.fit([train_ids, train_attention_masks],
                        train_labels,
                        batch_size=32,
                        epochs=5,
                        validation_data=([val_ids, val_attention_masks], val_labels),
                        callbacks=callbacks)

    # Test
    testing = Testing(model, {'test_ids': test_ids, 'test_attention_masks': test_attention_masks}, test_labels,
                      'bert', split.intents_dct['oos'])
    results_dct = testing.test_train()

    return results_dct

Beispiel #29

0

Datei anzeigen

Datei: aibot.py Projekt: dpooria/aibot

    def __init__(self):
        self.modified = False
        # load models
        self.classifier_tokenizer = BertTokenizer.from_pretrained(
            CLASSIFIER_PATH)
        self.classifier_config = AutoConfig.from_pretrained(CLASSIFIER_PATH)
        self.classifier_model = TFBertForSequenceClassification.from_pretrained(
            CLASSIFIER_PATH)

        self.ner_tokenizer = AutoTokenizer.from_pretrained(PARSBERTNER_PATH)
        self.ner_config = AutoConfig.from_pretrained(PARSBERTNER_PATH)
        self.ner_model = TFAutoModelForTokenClassification.from_pretrained(
            PARSBERTNER_PATH)
        self.weather_api = Weather()
        self.adhan_api = Adhan()
        self.time_api = Time()
        self.calender_api = Calender()

Beispiel #30

0

Datei anzeigen

Datei: model.py Projekt: patrik-kojanec/NLP_Project_Kojanec_Rus

    def fit(self,
            messages,
            y,
            epochs=2,
            validation_percent=0.15,
            allow_import=True):
        if self.english:
            messages = (messages['Translation'].values)
        else:
            messages = (messages['Message'].values)

        y = self.Lab_Encoder.fit_transform(
            y.to_numpy().astype(str)).astype(float)

        # preparing the data:
        # split in train and validation

        if os.path.exists(self.path) and allow_import:
            print("Loading Pretrained Model")
            self.model = bert_class.from_pretrained(self.path)
            return (self)
        else:

            X_train, X_val, y_train, y_val = train_test_split(
                messages, y, test_size=validation_percent, random_state=0)

            # tokenizing the data and making tf.dataset
            X_train_input = self.convert_to_input(X_train.astype(str))
            X_val_input = self.convert_to_input(X_val.astype(str))

            train_ds = tf.data.Dataset.from_tensor_slices(
                (X_train_input[0], X_train_input[1], X_train_input[2],
                 y_train)).map(
                     self.example_to_features).shuffle(100).batch(12).repeat(5)
            val_ds = tf.data.Dataset.from_tensor_slices(
                (X_val_input[0], X_val_input[1], X_val_input[2],
                 y_val)).map(self.example_to_features).batch(12)

            self.model.fit(train_ds,
                           epochs=epochs,
                           validation_data=val_ds,
                           verbose=1)
            os.mkdir(self.path)
            self.model.save_pretrained(self.path)
            return self