Python TFBertForSequenceClassification.from_pretrainedの例、transformers.TFBertForSequenceClassification.from_pretrained Pythonの例

コード例 #1

0

ファイルを表示

def load_transformer(model_type):
    if model_type == "distilbert":
        tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')
        model = TFDistilBertForSequenceClassification.from_pretrained(
            "distilbert-base-uncased", num_labels=1)
    elif model_type == "bert_x12":
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=1)
    elif model_type == "bert_x24":
        tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-large-uncased", num_labels=1)
    elif model_type == "albert_v2_x12":
        tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
        model = TFAlbertForSequenceClassification.from_pretrained(
            "albert-base-v2", num_labels=1)
    elif model_type == "longformer_x12":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-base-4096", num_labels=1)
    elif model_type == "longformer_x24":
        tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-large-4096')
        model = TFLongformerForSequenceClassification.from_pretrained(
            "allenai/longformer-large-4096", num_labels=1)
    else:
        raise ValueError(model_type + " was invalid")

    return model, tokenizer

コード例 #2

0

ファイルを表示

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.bert_layer = TFBertForSequenceClassification.from_pretrained(
         'bert-base-cased')
     self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
     self.cashed_train_dataset = None
     self.cashed_val_dataset = None

コード例 #3

0

ファイルを表示

    def load_model_from_zip(self, zip_archive):
        """a function that loads components of a serialized model from a zip
        given zip file using the python ZipFile interface and returns an
        instance of the model

        Arguments:

        zip_archive: ZipFile
            an instance of the python ZipFile interface that has loaded
            the file path specified by self.resource.disk_target

        Returns:

        model: Any
            any format of of machine learning model that will be stored
            in the self.model attribute for later use

        """

        # read the h5 bytes from the zip file
        with zip_archive.open('transformer.zip', "r") as file:
            model_bytes = file.read()  # read model bytes in the h5 format

        # load the bytes of the hugging face save path from a zip
        with tempfile.TemporaryDirectory() as directory:
            with tempfile.NamedTemporaryFile(suffix=".zip") as archive:
                archive.write(model_bytes)
                shutil.unpack_archive(archive.name, directory)
                return TFBert.from_pretrained(directory)

コード例 #4

0

ファイルを表示

ファイル: lectura_y_preprocesado.py プロジェクト: joSanchez28/BERT_on_tweets

def carga_modelo_BERT(model_path):
    """Carga el modelo BERT preentrenado y que se encuentra en la ruta `model_path`"""
    # Parámetros del script usado por HuggingFace para hacer análisis de sentimientos sobre otro conjunto de datos
    USE_XLA = False
    USE_AMP = False
    #TASK = "sst-2"
    #TFDS_TASK = "sst2"
    num_labels = 2
    tf.config.optimizer.set_jit(USE_XLA)
    tf.config.optimizer.set_experimental_options(
        {"auto_mixed_precision": USE_AMP})

    # Load tokenizer and model from pretrained model/vocabulary. Specify the number of labels to classify (2+: classification, 1: regression)
    config = BertConfig.from_pretrained("bert-base-cased",
                                        num_labels=num_labels)
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    model = TFBertForSequenceClassification.from_pretrained("bert-base-cased",
                                                            config=config)

    opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
    model.compile(optimizer=opt, loss=loss, metrics=[metric])
    model.load_weights(model_path)

    return model, tokenizer, config

コード例 #5

0

ファイルを表示

ファイル: ernie.py プロジェクト: TrendingTechnology/ernie-1

    def _load_remote_model(self, model_name, tokenizer_kwargs, model_kwargs):
        if model_name not in ModelsByFamily.Supported:
            raise ValueError(f'Model {model_name} not supported.')

        do_lower_case = False
        if 'uncased' in model_name.lower():
            do_lower_case = True
        tokenizer_kwargs.update({'do_lower_case': do_lower_case})

        self._tokenizer = None
        self._model = None

        if model_name in ModelsByFamily.Bert:
            self._tokenizer = BertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.Roberta:
            self._tokenizer = RobertaTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFRobertaForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.XLNet:
            self._tokenizer = XLNetTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFXLNetForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)
        elif model_name in ModelsByFamily.DistilBert:
            self._tokenizer = DistilBertTokenizer.from_pretrained(
                model_name, **tokenizer_kwargs)
            self._model = TFDistilBertForSequenceClassification.from_pretrained(
                model_name, **model_kwargs)

        assert self._tokenizer and self._model

コード例 #6

0

ファイルを表示

ファイル: fine_tuning.py プロジェクト: jerbarnes/norbert_benchmarking

 def build_model(self,
                 max_length,
                 train_batch_size,
                 learning_rate,
                 epochs,
                 num_labels,
                 tagset=None,
                 gpu_growth=True,
                 eval_batch_size=32):
     #if gpu_growth:
     #    model_utils.set_tf_memory_growth()
     if self.task == "pos":
         self.model = TFBertForTokenClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = MBERT_Tokenizer_pos.from_pretrained(
             self.model_name, do_lower_case=False)
     else:
         self.model = TFBertForSequenceClassification.from_pretrained(
             self.model_name, num_labels=num_labels, from_pt=True)
         self.tokenizer = BertTokenizer.from_pretrained(self.model_name,
                                                        do_lower_case=False)
     #self.model, self.tokenizer = model_utils.create_model(self.short_model_name, self.task, num_labels)
     self.model = model_utils.compile_model(self.model, self.task,
                                            learning_rate)
     print("Successfully built", self.model_name)
     self.max_length = max_length
     self.train_batch_size = train_batch_size
     self.learning_rate = learning_rate
     self.epochs = epochs
     self.num_labels = num_labels
     if tagset:
         self.tagset = tagset
         self.label_map = {label: i for i, label in enumerate(tagset)}
     self.eval_batch_size = eval_batch_size

コード例 #7

0

ファイルを表示

ファイル: HUGGINGFACE_BERT_NN.py プロジェクト: charlotteout/Econobox-SA-1

    def build(self, **kwargs):
        optimizer = kwargs.get("optimizer", "adam")
        metrics = kwargs.get("metrics", ['accuracy'])
        dropout_rate = kwargs.get('dropout_rate', 0.5)

        ## BUILDING THE GRAPH
        input_ids = tf.keras.layers.Input(shape=(1,50), name='input_ids', dtype=tf.int32)
        input_mask = tf.keras.layers.Input(shape=(1,50), name='input_mask', dtype=tf.int32)
        #bert_layer = Lambda_Bert_Layer(trainable=False, dynamic=True)
        bert_layer = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
        bert_layer.bert.trainable=False
        bert_output = bert_layer(input_ids[:,0,:], attention_mask=input_mask[:,0,:])
        """
        bert_layer.trainable = False
        bert_output=bert_output[0][:,-1,:]
        last_state = tf.reshape(bert_output, shape=(-1,768))
        dense_out_1 = tf.keras.layers.Dense(units=768, activation="relu")(last_state)  # reshape_lambda_layer
        dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1)
        dense_out_2 = tf.keras.layers.Dense(units=200, activation="relu")(dense_out_1)
        dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2)
        logits = tf.keras.layers.Dense(units=2, activation='softmax')(dense_out_2)
        """
        logits = bert_output[0]

        self.model = tf.keras.Model(inputs=(input_ids,input_mask), outputs=logits)
        self.model.compile(optimizer=optimizer,
                           loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                           metrics=metrics,
                           run_eagerly=True)
        self.model.summary()

コード例 #8

0

ファイルを表示

ファイル: models.py プロジェクト: isabelcachola/ML-Fall2020-Final-Project

    def train(self,
              train_data,
              train_labels,
              dev_data,
              dev_labels,
              save_model_path=f"models/bert.pkl"):
        ds_train_encoded = self.encode_examples(
            train_data, train_labels).batch(self.batch_size)
        ds_dev_encoded = self.encode_examples(dev_data, dev_labels).batch(
            self.batch_size)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'bert-base-uncased')
        optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate,
                                             epsilon=1e-08)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
        self.model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

        self.model.fit(ds_train_encoded,
                       epochs=self.epochs,
                       validation_data=ds_dev_encoded)

        predictions = self.model.predict(ds_dev_encoded, verbose=1).logits
        self.model.save_weights(save_model_path)
        print('Validation Loss:', log_loss(dev_labels, predictions))

コード例 #9

0

ファイルを表示

def train_model(request):
    if request.method == "POST":
        inputed_batch_size = int(request.POST['batch_size'])
        model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        #IMDB movie reviews
        dataset = get_dataset()
        #save dataset to file memory
        clean_dataset(dataset)

        #training and testing datasets here
        train = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='training',
            seed=123)

        test = tf.keras.preprocessing.text_dataset_from_directory(
            'aclImdb/train',
            batch_size=inputed_batch_size,
            validation_split=0.2,
            subset='validation',
            seed=123)

        #convert to pandas dataframes
        train = convert_dataset_to_dataframe(train)
        test = convert_dataset_to_dataframe(test)

        #convert data to tf datasets
        train_InputExamples, validation_InputExamples = convert_data_to_examples(
            train, test, DATA_COLUMN, LABEL_COLUMN)

        train_data = convert_examples_to_tf_dataset(list(train_InputExamples),
                                                    tokenizer)
        train_data = train_data.shuffle(100).batch(32).repeat(2)

        validation_data = convert_examples_to_tf_dataset(
            list(validation_InputExamples), tokenizer)
        validation_data = validation_data.batch(32)

        #fine tune it!
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5,
                                               epsilon=1e-08,
                                               clipnorm=1.0),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])

        model.fit(train_data, epochs=2, validation_data=validation_data)

        #save the model weights to memory to be used later
        model.save_pretrained(MOOD_MODEL_DIR)

    context = {}
    return render(request, 'mood_classifier/train_model_button.html', context)

コード例 #10

0

ファイルを表示

ファイル: main.py プロジェクト: MathieuDecline/LittleBill_PRD

    def __init__(self):

        model_name = 'bert-base-cased'
        self.tokenizer = BertTokenizer.from_pretrained(model_name)

        self.model = TFBertForSequenceClassification.from_pretrained(
            'C:/Users/lbbre/Documents/ECAM 5/PRD/content/assets')
        self.max_seq_len = 32

コード例 #11

0

ファイルを表示

ファイル: bert_semantic_equivalence.py プロジェクト: piyumalanthony/WellcomeML

    def load(self, path):
        """Loads model from path"""
        self.strategy = self._get_distributed_strategy()

        with self.strategy.scope():
            self.initialise_models()
            self.model = TFBertForSequenceClassification.from_pretrained(path)
        self.trained_ = True

コード例 #12

0

ファイルを表示

ファイル: intent_classification.py プロジェクト: we-chatter/chatbot_utils

 def model_build(self):
     self.bertConfig = BertConfig.from_pretrained(
         os.path.join(self.pretrain_path, "config.json"),
         num_labels=self.num_classes)
     self.model = TFBertForSequenceClassification.from_pretrained(
         os.path.join(self.pretrain_path, "tf_model.h5"),
         config=self.bertConfig)
     self.model.summary()

コード例 #13

0

ファイルを表示

ファイル: deeplegis.py プロジェクト: lukevancleve/DeepLegis

    def build(self):

        # Handle the Meta Data
        ids = tf.keras.Input((self.config.max_length, ),
                             dtype=tf.int32,
                             name='input_ids')
        vn = tf.keras.Input((1, ), dtype=tf.float32, name='version_number')
        pl = tf.keras.Input((1, ), dtype=tf.float32, name='partisan_lean')
        cat = tf.keras.Input((self.config.n_sc_id_classes, ),
                             dtype=tf.float32,
                             name='sc_id')
        meta = tf.concat([vn, pl, cat], axis=-1)

        # Load the initial weights with the ones trained from the DL model without text
        if self.config.load_weights_from_no_text:
            #if 'no_text_dense_layer_initialization_path' in self.config:
            print(
                "Usinging pretrained weights from the no_text model! --------------------"
            )
            model_location = self.config.data_vol + "models/no_text/full_model.h5"
            ntdl = tf.keras.layers.Dense(
                self.config.n_dense_layers,
                activation='relu',
                name="no_text_dense_layer",
                kernel_initializer=noTextKernelInitializer(
                    model_location=model_location),
                bias_initializer=noTextBiasInitializer(
                    model_location=model_location))
        else:
            ntdl = tf.keras.layers.Dense(self.config.n_dense_layers,
                                         activation='relu',
                                         name="no_text_dense_layer")
        meta = ntdl(meta)

        # Handle the Transformer
        self.base_transformer_model = TFBertForSequenceClassification.from_pretrained(
            "bert-base-uncased")
        x = self.base_transformer_model.bert(ids)  # Get the main Layer
        x = x['last_hidden_state'][:, 0, :]
        x = tf.keras.layers.Dropout(0.2)(x)

        # Combine the two and run through another dense layer.
        x = tf.concat([x, meta], axis=-1)
        x = tf.keras.layers.Dense(self.config.n_dense_layers,
                                  activation='relu')(x)
        x = tf.keras.layers.Dropout(0.2)(x)
        x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
        dl_model = tf.keras.Model(inputs={
            "input_ids": ids,
            "version_number": vn,
            "partisan_lean": pl,
            "sc_id": cat
        },
                                  outputs=[x])

        self.deep_legis_model = dl_model

コード例 #14

0

ファイルを表示

ファイル: bert_classification_tf2.py プロジェクト: nbcstevenchen/BERT-for-Classification

 def create_model_2(self):
     model = TFBertForSequenceClassification.from_pretrained(
         'bert-base-uncased')
     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,
                                                      epsilon=1e-08),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(
                       from_logits=True),
                   metrics=["acc"])
     model.summary()
     return model

コード例 #15

0

ファイルを表示

    def __init__(self,
                 n_intents=None,
                 dropout=0.2,
                 model_name="bert-base-uncased"):
        super().__init__(name="intent_classifier")

        self.tokenizer = Tokenizer()
        self.bert = TFBertForSequenceClassification.from_pretrained(model_name)
        self.dropout = Dropout(dropout)
        self.intent_classifier = Dense(n_intents, activation='softmax')

コード例 #16

0

ファイルを表示

def model_compile():
    model = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased', num_labels=2)
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    return model

コード例 #17

0

ファイルを表示

    def __init__(self, extractor, config, *args, **kwargs):
        super(TFVanillaBert_Class, self).__init__(*args, **kwargs)
        self.extractor = extractor

        # TFBertForSequenceClassification contains both the BERT and the linear classifier layers
        self.bert = TFBertForSequenceClassification.from_pretrained(
            config["pretrained"], hidden_dropout_prob=0.1)

        assert extractor.config[
            "numpassages"] == 1, "numpassages should be 1 for TFVanillaBERT"
        self.config = config

コード例 #18

0

ファイルを表示

ファイル: best_model_implementation.py プロジェクト: CS-433/cs-433-project-2-rojlet_lml

def build_model():
    import tensorflow as tf
    from transformers import TFBertForSequenceClassification
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE, from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=3e-5)

    model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])
    return model

コード例 #19

0

ファイルを表示

ファイル: bert_classifier.py プロジェクト: piyumalanthony/WellcomeML

    def _init_model(self, num_labels=2):
        config = {"name": self.pretrained, "from_pt": self.from_pt}
        if self.pretrained in PRETRAINED_CONFIG:
            config = PRETRAINED_CONFIG[self.pretrained]

        pretrained = config["name"]
        from_pt = config["from_pt"]

        self.tokenizer = BertTokenizer.from_pretrained(pretrained)
        self.model = TFBertForSequenceClassification.from_pretrained(
            pretrained, from_pt=from_pt, num_labels=num_labels)

コード例 #20

0

ファイルを表示

def fine_tune_model(ds, export_dir):
    (train_dataset, test_dataset, val_dataset) = get_test_train_val_datasets(ds)
    learning_rate = 2e-5
    number_of_epochs = 1
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
    bert_history = model.fit(train_dataset, epochs=number_of_epochs, validation_data=val_dataset)
    model.save_pretrained(export_dir)
    return model

コード例 #21

0

ファイルを表示

ファイル: test_transformers.py プロジェクト: vinceyzw/keras-onnx

 def test_TFBertForSequenceClassification(self):
     from transformers import BertTokenizer, TFBertForSequenceClassification
     pretrained_weights = 'bert-base-uncased'
     tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFBertForSequenceClassification.from_pretrained(
         pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx,
                          predictions, self.model_files))

コード例 #22

0

ファイルを表示

ファイル: bert_example.py プロジェクト: laigen/gs-research-workflow

def run_first_test():
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :]  # Batch size 1
    print(f"input_ids:{input_ids}")
    outputs = model(input_ids)
    logits = outputs[0]
    print(logits)
    print("-" * 30)
    print(f"outputs:{outputs}")
    print("-" * 30)
    model.summary()

コード例 #23

0

ファイルを表示

ファイル: model.py プロジェクト: s9891326/Fine-Tuning-BERT

    def get_model(self):
        """
        Build BERT model. Using origin way to build BERT model On the Transformers package.

        But this way don't custom input and output, so that can't adapt to deepnlp project worker.
        Need to change deepnlp project parameter.
        """
        model = TFBertForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path=self.model_name,
            from_pt=True if self.from_pt_word in self.model_name else False,
            num_labels=len(self.label_list))

        return model

コード例 #24

0

ファイルを表示

ファイル: bert_semantic_equivalence.py プロジェクト: piyumalanthony/WellcomeML

    def initialise_models(self):
        if self.pretrained == "bert":
            model_name = "bert-base-cased"
            from_pt = False
        elif self.pretrained == "scibert":
            model_name = "allenai/scibert_scivocab_cased"
            from_pt = True

        self.config = BertConfig.from_pretrained(model_name, num_labels=2)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = TFBertForSequenceClassification.from_pretrained(
            model_name, config=self.config, from_pt=from_pt)
        return self.model

コード例 #25

0

ファイルを表示

ファイル: utils.py プロジェクト: Raj-Parekh24/Sentiment-Analysis

def bert_result(sentence):
    model = TFBertForSequenceClassification.from_pretrained(
        "bert-base-uncased")
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model.load_weights("senti_weights.ckpt")
    tf_batch = tokenizer(sentence,
                         max_length=128,
                         padding=True,
                         truncation=True,
                         return_tensors='tf')
    tf_outputs = model(tf_batch)
    tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
    label = tf_predictions.numpy()
    return int(label[0][1] * 100)

コード例 #26

0

ファイルを表示

    def __init__(self, weights_path):
        """
    :param weights_path: specifies where load/store weights of the model
    :type weights_path: str
    """
        super().__init__(weights_path)

        # A tensorflow model of Bert base (uncased), pre-trained.
        # More on it on our report.
        self.__model = TFBertForSequenceClassification.from_pretrained(
            'bert-large-uncased')

        # Instanciating a proper tokenizer for Bert
        self.__tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

コード例 #27

0

ファイルを表示

ファイル: model.py プロジェクト: bhavyajeet/PositivePanda

    def __init__(self,
                 model_path="./modelSentiment",
                 tokenizer="bert-base-uncased"):
        """
        Initialize model

        Downloads the model if not present
        """
        if os.path.exists(model_path):
            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)
        else:
            print("Downloading model...")
            Model.download_file_from_google_drive(
                "1uthnEb7WYnIR6y0VVX4gMPoqG-X0oKRK", "Modelfile.zip")

            with zipfile.ZipFile("Modelfile.zip", "r") as zip_ref:
                zip_ref.extractall("./")

            self.model = TFBertForSequenceClassification.from_pretrained(
                model_path)

        self.tokenizer = BertTokenizer.from_pretrained(tokenizer)

コード例 #28

0

ファイルを表示

def evaluate(dataset, limit_num_sents: bool):
    # Split and tokenize dataset
    split = Split_BERT()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_train, y_train = split.get_X_y(dataset['train'] + dataset['oos_train'], limit_num_sents=limit_num_sents,
                                     set_type='train')
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents,
                                   set_type='test')

    train_ids, train_attention_masks, train_labels = tokenize_BERT(X_train, y_train, tokenizer)
    val_ids, val_attention_masks, val_labels = tokenize_BERT(X_val, y_val, tokenizer)
    test_ids, test_attention_masks, test_labels = tokenize_BERT(X_test, y_test, tokenizer)

    num_labels = len(split.intents_dct.keys())

    # Train model
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                            num_labels=num_labels)  # we have to adjust the number of labels
    print('\nBert Model', model.summary())

    log_dir = 'tensorboard_data/tb_bert'
    model_save_path = './models/bert_model.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path, save_weights_only=True, monitor='val_loss',
                                           mode='min',
                                           save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model.fit([train_ids, train_attention_masks],
                        train_labels,
                        batch_size=32,
                        epochs=5,
                        validation_data=([val_ids, val_attention_masks], val_labels),
                        callbacks=callbacks)

    # Test
    testing = Testing(model, {'test_ids': test_ids, 'test_attention_masks': test_attention_masks}, test_labels,
                      'bert', split.intents_dct['oos'])
    results_dct = testing.test_train()

    return results_dct

コード例 #29

0

ファイルを表示

ファイル: aibot.py プロジェクト: dpooria/aibot

    def __init__(self):
        self.modified = False
        # load models
        self.classifier_tokenizer = BertTokenizer.from_pretrained(
            CLASSIFIER_PATH)
        self.classifier_config = AutoConfig.from_pretrained(CLASSIFIER_PATH)
        self.classifier_model = TFBertForSequenceClassification.from_pretrained(
            CLASSIFIER_PATH)

        self.ner_tokenizer = AutoTokenizer.from_pretrained(PARSBERTNER_PATH)
        self.ner_config = AutoConfig.from_pretrained(PARSBERTNER_PATH)
        self.ner_model = TFAutoModelForTokenClassification.from_pretrained(
            PARSBERTNER_PATH)
        self.weather_api = Weather()
        self.adhan_api = Adhan()
        self.time_api = Time()
        self.calender_api = Calender()

コード例 #30

0

ファイルを表示

ファイル: model.py プロジェクト: patrik-kojanec/NLP_Project_Kojanec_Rus

    def fit(self,
            messages,
            y,
            epochs=2,
            validation_percent=0.15,
            allow_import=True):
        if self.english:
            messages = (messages['Translation'].values)
        else:
            messages = (messages['Message'].values)

        y = self.Lab_Encoder.fit_transform(
            y.to_numpy().astype(str)).astype(float)

        # preparing the data:
        # split in train and validation

        if os.path.exists(self.path) and allow_import:
            print("Loading Pretrained Model")
            self.model = bert_class.from_pretrained(self.path)
            return (self)
        else:

            X_train, X_val, y_train, y_val = train_test_split(
                messages, y, test_size=validation_percent, random_state=0)

            # tokenizing the data and making tf.dataset
            X_train_input = self.convert_to_input(X_train.astype(str))
            X_val_input = self.convert_to_input(X_val.astype(str))

            train_ds = tf.data.Dataset.from_tensor_slices(
                (X_train_input[0], X_train_input[1], X_train_input[2],
                 y_train)).map(
                     self.example_to_features).shuffle(100).batch(12).repeat(5)
            val_ds = tf.data.Dataset.from_tensor_slices(
                (X_val_input[0], X_val_input[1], X_val_input[2],
                 y_val)).map(self.example_to_features).batch(12)

            self.model.fit(train_ds,
                           epochs=epochs,
                           validation_data=val_ds,
                           verbose=1)
            os.mkdir(self.path)
            self.model.save_pretrained(self.path)
            return self