Esempio n. 1
0
    def test_integration(self):
        level_origin = logging.get_verbosity()

        logger = logging.get_logger(
            "transformers.models.bart.tokenization_bart")
        msg = "Testing 1, 2, 3"

        # should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
        if level_origin <= logging.WARNING:
            with CaptureLogger(logger) as cl:
                logger.warning(msg)
            self.assertEqual(cl.out, msg + "\n")

        # this is setting the level for all of `transformers.*` loggers
        logging.set_verbosity_error()

        # should not be able to log warnings
        with CaptureLogger(logger) as cl:
            logger.warning(msg)
        self.assertEqual(cl.out, "")

        # should be able to log warnings again
        logging.set_verbosity_warning()
        with CaptureLogger(logger) as cl:
            logger.warning(msg)
        self.assertEqual(cl.out, msg + "\n")

        # restore to the original level
        logging.set_verbosity(level_origin)
Esempio n. 2
0
def LoggingLevel(level):
    """
    This is a context manager to temporarily change transformers modules logging level to the desired value and have it
    restored to the original setting at the end of the scope.

    For example ::

        with LoggingLevel(logging.INFO):
            AutoModel.from_pretrained("gpt2") # calls logger.info() several times

    """
    orig_level = transformers_logging.get_verbosity()
    try:
        transformers_logging.set_verbosity(level)
        yield
    finally:
        transformers_logging.set_verbosity(orig_level)
Esempio n. 3
0
def instantiate_trainer(config):
    verbosity = config.get("verbosity", logging.INFO)
    t_logging.set_verbosity(verbosity)
    logger.setLevel(verbosity)

    # debug (see torch.autograd.detect_anomaly)
    set_detect_anomaly(bool(config.get("debug", False)))

    # model
    model_args = dict(name="ViT-B/32",
                      jit=False,
                      training=True,
                      Class="CLIPDecoder")
    model_args.update(config.get("model", {}))
    model_args["Class"] = getattr(clip.model, model_args["Class"])
    logger.info(f"loading model from pre-trained CLIP {model_args}...")
    model, image_preprocess = load(**model_args)

    # data
    train_dataset, eval_dataset = get_datasets(
        image_preprocess=image_preprocess, **config.get("dataset", {}))

    # training
    criterion_args = config.get("criterion", {})
    # get criterion class (e.g. nn.NLLLoss) by name
    CriterionClass = getattr(nn, criterion_args.pop("Class", "NLLLoss"))
    criterion = CriterionClass(**criterion_args)
    learner_args = config.get("learner", {})
    LearnerClass = getattr(sys.modules[__name__],
                           learner_args.pop("Class", "LanguageModel"))
    learner = LearnerClass(model, criterion)
    training_args = Seq2SeqTrainingArguments(**config.get("training", {}))
    trainer = CLIPTrainer(model=learner,
                          args=training_args,
                          data_collator=collate_batch,
                          train_dataset=train_dataset,
                          eval_dataset=eval_dataset,
                          compute_metrics=compute_metrics)
    # training callbacks
    for callback in config.get("callbacks", []):
        CallbackClass = getattr(trainer_callback, callback.pop("Class"))
        trainer.add_callback(CallbackClass(**callback))

    return trainer, training_args, config
Esempio n. 4
0
    def test_set_level(self):
        logger = logging.get_logger()

        # the current default level is logging.WARNING
        level_origin = logging.get_verbosity()

        logging.set_verbosity_error()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_warning()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_info()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        logging.set_verbosity_debug()
        self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())

        # restore to the original level
        logging.set_verbosity(level_origin)
Esempio n. 5
0
                    help='directory with .wav files for queries')
parser.add_argument('--references_dir',
                    default='references',
                    help='directory with .wav files for references')

parser.add_argument('--model', default='wav2vec2-large-xlsr-53')
parser.add_argument(
    '--hft_logging',
    default=40,
    help=
    'HuggingFace Transformers verbosity level (40 = errors, 30 = warnings, 20 = info, 10 = debug)'
)

args = parser.parse_args()

logging.set_verbosity(args.hft_logging)


def load_wav2vec2_featurizer(model, layer=None):
    """
    Loads Wav2Vec2 featurization pipeline and returns it as a function.
    Featurizer returns a list with all hidden layer representations if "layer" argument is None.
    Otherwise, only returns the specified layer representations.
    """

    model_spec = KNOWN_MODELS.get(model, model)
    model_kwargs = {}
    if layer is not None:
        model_kwargs["num_hidden_layers"] = layer if layer > 0 else 0

    if type(model_spec) is dict:
Esempio n. 6
0
from transformers import BertForMaskedLM, BertTokenizer, logging
from gensim.models import KeyedVectors
from nltk.stem import PorterStemmer

import nltk
import torch
import math

from . import config

# Word embeddings.
embeddings = None

# Surpress warnings.
logging.set_verbosity(logging.ERROR)

# Load BERT model and tokenizer.
print("\nLoading BERT model...")
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-uncased")
model = BertForMaskedLM.from_pretrained("bert-base-multilingual-uncased")
print("\nBERT model loaded!")

# Create stemmer object.
stemmer = PorterStemmer()

def load_embeddings(language):
    """Load word embeddings for selected language."""
    try:
        print("\nAttempting to load embeddings...")
        wv_path = "simplifier/embeddings/" + config.lang + ".kv"
        wv_model = KeyedVectors.load(wv_path, mmap='r')
Esempio n. 7
0
import os
import torch
import soundfile as sf
import pandas as pd
import numpy as np
from tqdm import tqdm

from argparse import ArgumentParser

from transformers import logging
from transformers.models.wav2vec2 import Wav2Vec2Model

logging.set_verbosity(40)

parser = ArgumentParser(
    prog='Wav2Vec2 Featurizer',
    description='Extract features aggregated across intervals specified in input csv file',
)

parser.add_argument('--model', default='wav2vec2-large-xlsr-53')
parser.add_argument('--intervals_csv', help = 'CSV file of intervals')
parser.add_argument('--features_csv', help = 'Name of output CSV file')

args = parser.parse_args()

KNOWN_MODELS = {
    # Pre-trained
    'wav2vec2-base': 'facebook/wav2vec2-base',
    'wav2vec2-large': {'name' : 'facebook/wav2vec2-large', 'revision' : '85c73b1a7c1ee154fd7b06634ca7f42321db94db' },
    # March 11, 2021 version: https://huggingface.co/facebook/wav2vec2-large/commit/85c73b1a7c1ee154fd7b06634ca7f42321db94db
    'wav2vec2-large-lv60': 'facebook/wav2vec2-large-lv60',