def test_integration(self): level_origin = logging.get_verbosity() logger = logging.get_logger( "transformers.models.bart.tokenization_bart") msg = "Testing 1, 2, 3" # should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`) if level_origin <= logging.WARNING: with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, msg + "\n") # this is setting the level for all of `transformers.*` loggers logging.set_verbosity_error() # should not be able to log warnings with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, "") # should be able to log warnings again logging.set_verbosity_warning() with CaptureLogger(logger) as cl: logger.warning(msg) self.assertEqual(cl.out, msg + "\n") # restore to the original level logging.set_verbosity(level_origin)
def __init__(self, model_folder: str, to_device, verbose=False): self.model_folder = model_folder self.device = to_device if not verbose: logging.set_verbosity_warning() # load model self.tokenizer = AutoTokenizer.from_pretrained(model_folder, local_files_only=True) self.model = AutoModelForQuestionAnswering.from_pretrained( model_folder, local_files_only=True).to(to_device)
def __init__(self, model_folder: str, to_device, verbose=False): self.model_folder = model_folder self.device = to_device if not verbose: logging.set_verbosity_warning() # load model self.tokenizer = AutoTokenizer.from_pretrained(model_folder, local_files_only=True) self.model = AutoModelForSequenceClassification.from_pretrained( model_folder, local_files_only=True).to(to_device) self.context_window = 1024
def __init__(self, model_folder: str, to_device, verbose=False): self.model_folder = model_folder self.device = to_device if not verbose: logging.set_verbosity_warning() # load model self.tokenizer = LEDTokenizer.from_pretrained(model_folder, local_files_only=True) self.model = LEDForConditionalGeneration.from_pretrained( model_folder, local_files_only=True).to(to_device) # model config self.model.config.no_repeat_ngram_size = 3
def test_set_level(self): logger = logging.get_logger() # the current default level is logging.WARNING level_origin = logging.get_verbosity() logging.set_verbosity_error() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_warning() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_info() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) logging.set_verbosity_debug() self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity()) # restore to the original level logging.set_verbosity(level_origin)
from collections import OrderedDict from os.path import basename, dirname import fairseq import torch from fairseq import hub_utils from fairseq.data.dictionary import Dictionary from transformers import WEIGHTS_NAME, logging from transformers.configuration_fsmt import FSMTConfig from transformers.modeling_fsmt import FSMTForConditionalGeneration from transformers.tokenization_fsmt import VOCAB_FILES_NAMES from transformers.tokenization_utils_base import TOKENIZER_CONFIG_FILE logging.set_verbosity_warning() json_indent = 2 # based on the results of a search on a range of `num_beams`, `length_penalty` and `early_stopping` # values against wmt19 test data to obtain the best BLEU scores, we will use the following defaults: # # * `num_beams`: 5 (higher scores better, but requires more memory/is slower, can be adjusted by users) # * `early_stopping`: `False` consistently scored better # * `length_penalty` varied, so will assign the best one depending on the model best_score_hparams = { # fairseq: "wmt19-ru-en": {"length_penalty": 1.1}, "wmt19-en-ru": {"length_penalty": 1.15}, "wmt19-en-de": {"length_penalty": 1.0}, "wmt19-de-en": {"length_penalty": 1.1},
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'} from typing import Tuple, Dict from sklearn.preprocessing import LabelEncoder import tensorflow as tf from tensorflow.keras.utils import to_categorical from datasets import load_dataset from transformers import (TFAutoModelForSequenceClassification, AutoTokenizer, logging as transformers_logging) from callbacks import HFModelCheckPoint transformers_logging.set_verbosity_warning() MODEL_PATH = r"D:\Models\NLP\longformer\tf-longformer-base-4096" DATA_COLS = ['input_ids', 'attention_mask', 'label'] SAVE_PATH = r"D:\Fine-tuned Models\NLP\longformer\tf-longformer-base-4096" def prep_data(fpath: str, tpath: str, text_col: str = 'text', label_col: str = 'label', seq_len: int = 500, *args, **kwargs) -> Tuple[tf.data.Dataset, int]: dataset = load_dataset('csv', data_files=fpath, split='train',
AutoModel, AutoTokenizer, ProgressCallback, ) from transformers.integrations import TensorBoardCallback, WandbCallback from transformers.trainer_callback import PrinterCallback from joint_nlu_models import * from sklearn.metrics import classification_report from preprocessing.conll_loader import ConLLLoader, intent_labels_list, slot_labels_list from joint_metrics import running_metrics, joint_classification_report, exact_match # configure loggers trans_log.set_verbosity_warning() logging.basicConfig( level=logging.INFO) # disable huggingface warning and wandb os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["WANDB_DISABLED"] = "true" #suppress annoying warnings if not sys.warnoptions: warnings.simplefilter("ignore") # disable wandb for memory efficiency. # this does not seem to work. # will make issue on huggingface github