예제 #1
0
    def __init__(
        self,
        pretrained_path,
        save_path,
        output_norm=True,
        freeze=True,
        pretrain=True,
    ):
        super().__init__()

        # Download the pretrained wav2vec2 model. It can be local or online.
        download_file(pretrained_path, save_path)

        (
            model,
            cfg,
            task,
        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path])

        # wav2vec pretrained models may need the input waveform to be normalized
        # Hence, we check if the model has be trained with or without it.
        self.normalize = cfg.normalize
        model = model[0]
        self.model = model
        self.freeze = freeze
        self.output_norm = output_norm
        if self.freeze:
            model.eval()

        # Randomly initialized layers if pretrain is False
        if not (pretrain):
            self.reset_layer(self.model)
예제 #2
0
def download_to_dir(url, directory):
    """Parse filename from url and download to directory."""
    print("called download_to_dir")
    os.makedirs(directory, exist_ok=True)
    filename = os.path.basename(urllib.parse.urlparse(url).path)
    download_file(url, os.path.join(directory, filename))
    return os.path.join(directory, filename)
예제 #3
0
    def __init__(
        self,
        pretrained_path,
        save_path,
        output_norm=True,
        freeze=True,
        pretrain=True,
    ):
        super().__init__()
        self.freeze = freeze
        self.output_norm = output_norm

        # Download the pretrained wav2vec1 model. It can be local or online.
        download_file(pretrained_path, save_path)

        (
            model,
            cfg,
            task,
        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [pretrained_path]
        )

        self.model = model
        self.model = self.model[0]
        if self.freeze:
            model.eval()

        # Randomly initialized layers if pretrain is False
        if not (pretrain):
            self.reset_layer(self.model)
예제 #4
0
def create_lexicon(lexicon_save_filepath):
    """
    Creates the lexicon object, downloading if it hasn't been done yet.

    Arguments
    ---------
    lexicon_save_filepath : str
        Path to save the lexicon when downloading
    """
    if not os.path.isfile(lexicon_save_filepath):
        download_file(LEXICON_URL, lexicon_save_filepath)

    # Iterate lexicon file and add the first pronunciation in the file for
    # each word to our lexicon dictionary
    lexicon = MISSING_LEXICON
    delayed_words = {}
    for line in open(lexicon_save_filepath):
        line = line.split()
        phns = " ".join(p.strip("012") for p in line[1:])

        # Don't add words with punctuation until we can be sure they won't
        # overwrite words without punctuation.
        clean_word = remove_punctuation(line[0])
        if clean_word != line[0] and clean_word not in delayed_words:
            delayed_words[clean_word] = phns
        elif clean_word == line[0] and clean_word not in lexicon:
            lexicon[clean_word] = phns

    # Add words with punctuation if they won't overwrite non-punctuated words
    for word, phns in delayed_words.items():
        if word not in lexicon:
            lexicon[word] = phns

    return lexicon
예제 #5
0
def download_and_pretrain():
    """Downloads pre-trained model
    """
    save_model_path = params["model_dir"] + "/emb.ckpt"
    download_file(params["embedding_file"], save_model_path)
    params["embedding_model"].load_state_dict(torch.load(save_model_path),
                                              strict=True)
예제 #6
0
def _prepare_openrir(folder, reverb_csv, noise_csv, max_noise_len):
    """Prepare the openrir dataset for adding reverb and noises.

    Arguments
    ---------
    folder : str
        The location of the folder containing the dataset.
    reverb_csv : str
        Filename for storing the prepared reverb csv.
    noise_csv : str
        Filename for storing the prepared noise csv.
    max_noise_len : float
        The maximum noise length in seconds. Noises longer
        than this will be cut into pieces.
    """

    # Download and unpack if necessary
    filepath = os.path.join(folder, "rirs_noises.zip")

    if not os.path.isdir(os.path.join(folder, "RIRS_NOISES")):
        download_file(OPENRIR_URL, filepath, unpack=True)
    else:
        download_file(OPENRIR_URL, filepath)

    # Prepare reverb csv if necessary
    if not os.path.isfile(reverb_csv):
        rir_filelist = os.path.join(folder, "RIRS_NOISES",
                                    "real_rirs_isotropic_noises", "rir_list")
        _prepare_csv(folder, rir_filelist, reverb_csv)

    # Prepare noise csv if necessary
    if not os.path.isfile(noise_csv):
        noise_filelist = os.path.join(folder, "RIRS_NOISES",
                                      "pointsource_noises", "noise_list")
        _prepare_csv(folder, noise_filelist, noise_csv, max_noise_len)
예제 #7
0
    def load_lm(self):
        """Loads the LM specified in the yaml file"""
        save_model_path = os.path.join(self.save_folder, "lm.ckpt")
        download_file(self.hparams["lm_ckpt_file"], save_model_path)

        # Load downloaded model, removing prefix
        state_dict = torch.load(save_model_path, map_location=self.device)
        self.model.load_state_dict(state_dict, strict=True)
예제 #8
0
 def load_lm(self):
     """Loads the LM specified in the yaml file"""
     save_model_path = os.path.join(self.hparams.output_folder, "save",
                                    "lm_model.ckpt")
     download_file(self.hparams.lm_ckpt_file, save_model_path)
     state_dict = torch.load(save_model_path)
     self.modules.lm_model.load_state_dict(state_dict, strict=True)
     self.modules.lm_model.eval()
예제 #9
0
    def load_lm(self):
        """Loads the LM specified in the yaml file"""
        save_model_path = os.path.join(self.hparams.output_folder, "save",
                                       "lm_model.ckpt")
        download_file(self.hparams.lm_ckpt_file, save_model_path)

        # Load downloaded model, removing prefix
        state_dict = torch.load(save_model_path)
        self.hparams.modules["lm_model"].load_state_dict(state_dict)
def download_and_pretrain():
    """ Downloads the specified pre-trained model
    """
    if "http" in params["embedding_file"]:
        save_model_path = params["output_folder"] + "/save/embedding_model.ckpt"
        download_file(params["embedding_file"], save_model_path)
    else:
        save_model_path = params["embedding_file"]
    params["embedding_model"].load_state_dict(torch.load(save_model_path),
                                              strict=True)
예제 #11
0
    def load_asr(self):
        """Loads the AM specified in the yaml file"""
        save_model_path = os.path.join(self.hparams["save_folder"],
                                       "asr_model.ckpt")
        if "http" in self.hparams["asr_ckpt_file"]:
            download_file(self.hparams["asr_ckpt_file"], save_model_path)

        self.mod.asr_model.load_state_dict(torch.load(
            save_model_path, map_location=self.device),
                                           strict=True)
예제 #12
0
    def __init__(
        self,
        hparams_file="https://www.dropbox.com/s/ct72as3hapy8kb5/ecapa_big.yaml?dl=1",
        overrides={},
        freeze_params=True,
        norm_emb=True,
        save_folder="emb_model",
    ):
        """Downloads the pretrained modules specified in the yaml"""
        super().__init__()
        self.norm_emb = norm_emb

        save_model_path = os.path.join(save_folder, "embedding.yaml")
        download_file(hparams_file, save_model_path)
        hparams_file = save_model_path

        # Loading modules defined in the yaml file
        with open(hparams_file) as fin:
            overrides["save_folder"] = save_folder
            self.hparams = load_hyperpyyaml(fin, overrides)

        # putting modules on the right device
        # We need to check if DDP has been initialised
        # in order to give the right device
        if torch.distributed.is_initialized():
            self.device = ":".join([
                self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"]
            ])
        else:
            self.device = self.hparams["device"]

        # Creating directory where pre-trained models are stored
        if not os.path.isabs(self.hparams["save_folder"]):
            dirname = os.path.dirname(__file__)
            self.hparams["save_folder"] = os.path.join(
                dirname, self.hparams["save_folder"])
        if not os.path.isdir(self.hparams["save_folder"]):
            os.makedirs(self.hparams["save_folder"])

        # putting modules on the right device
        self.embedding_model = self.hparams["embedding_model"].to(self.device)
        self.mean_var_norm = self.hparams["mean_var_norm"].to(self.device)
        self.mean_var_norm_emb = self.hparams["mean_var_norm_emb"].to(
            self.device)
        self.similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)

        # Load pretrained modules
        self.load_model()

        # If we don't want to backprop, freeze the pretrained parameters
        if freeze_params:
            self.embedding_model.eval()
            for p in self.embedding_model.parameters():
                p.requires_grad = False
예제 #13
0
    def load_lm(self):
        """Loads the LM specified in the yaml file"""
        save_model_path = os.path.join(self.hparams.output_folder, "save",
                                       "lm_model.ckpt")
        if not os.path.isfile(save_model_path):
            download_file(self.hparams.language_model_file, save_model_path)

        # Load downloaded model, removing prefix
        state_dict = torch.load(save_model_path, map_location=self.device)
        self.hparams.lm_model.load_state_dict(state_dict, strict=True)
        self.hparams.lm_model.eval()
예제 #14
0
 def download_and_pretrain(self):
     """ Downloads the specified pre-trained model
     """
     save_model_path = (
         hparams["output_folder"] + "/save/embedding_model.ckpt"
     )
     if "http" in hparams["embedding_file"]:
         download_file(hparams["embedding_file"], save_model_path)
     hparams["embedding_model"].load_state_dict(
         torch.load(save_model_path), strict=True
     )
예제 #15
0
    def __init__(self, tokenizer_file, save_folder="model_checkpoints"):
        super().__init__()

        save_file = os.path.join(save_folder, "tok.model")
        download_file(
            source=tokenizer_file, dest=save_file, replace_existing=False,
        )

        # Defining tokenizer and loading it
        self.spm = spm.SentencePieceProcessor()
        self.spm.load(save_file)
def download_mini_librispeech(destination):
    """Download dataset and unpack it.

    Arguments
    ---------
    destination : str
        Place to put dataset.
    """
    train_archive = os.path.join(destination, "train-clean-5.tar.gz")
    download_file(MINILIBRI_TRAIN_URL, train_archive)
    shutil.unpack_archive(train_archive, destination)
예제 #17
0
    def load_lm(self):
        """Loads the LM specified in the yaml file"""
        save_model_path = os.path.join(self.hparams.output_folder, "save",
                                       "lm_model.ckpt")
        download_file(self.hparams.language_model_file, save_model_path)

        # Load downloaded model, removing prefix
        state_dict = torch.load(save_model_path,
                                map_location=torch.device(self.device))
        state_dict = {k.split(".", 1)[1]: v for k, v in state_dict.items()}
        self.hparams.lm_model.load_state_dict(state_dict, strict=True)
        self.hparams.lm_model.eval()
        logger.info("loaded LM from {}".format(save_model_path))
예제 #18
0
    def __init__(self, tokenizer_file):
        super().__init__()

        if "http" in tokenizer_file:
            temp_dir = tempfile.TemporaryDirectory()
            save_file = os.path.join(temp_dir.name, "tok.model")
            download_file(
                source=tokenizer_file, dest=save_file, replace_existing=True,
            )
            tokenizer_file = save_file

        # Defining tokenizer and loading it
        self.spm = spm.SentencePieceProcessor()
        self.spm.load(tokenizer_file)
예제 #19
0
    def __init__(
        self,
        hparams_file="https://www.dropbox.com/s/54vmm04g3gezwz3/pretrained_ASR_BPE1000.yaml?dl=1",
        save_folder="asr_model",
        overrides={},
        freeze_params=True,
    ):
        """Downloads the pretrained modules specified in the yaml"""
        super().__init__()

        save_model_path = os.path.join(save_folder, "ASR.yaml")
        download_file(hparams_file, save_model_path)
        hparams_file = save_model_path

        # Loading modules defined in the yaml file
        with open(hparams_file) as fin:
            overrides["save_folder"] = save_folder
            self.hparams = load_hyperpyyaml(fin, overrides)

        # putting modules on the right device
        # We need to check if DDP has been initialised
        # in order to give the right device
        if torch.distributed.is_initialized():
            self.device = ":".join([
                self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"]
            ])
        else:
            self.device = self.hparams["device"]

        # Creating directory where pre-trained models are stored
        if not os.path.isdir(self.hparams["save_folder"]):
            os.makedirs(self.hparams["save_folder"])

        # putting modules on the right device
        self.mod = torch.nn.ModuleDict(self.hparams["modules"]).to(self.device)

        # Load pretrained modules
        self.load_asr()

        # The tokenizer is the one used by the LM
        self.tokenizer = self.hparams["lm_model"].tokenizer

        # If we don't want to backprop, freeze the pretrained parameters
        if freeze_params:
            self.mod.asr_model.eval()
            for p in self.mod.asr_model.parameters():
                p.requires_grad = False
            self.mod.lm_model.eval()
            for p in self.mod.lm_model.parameters():
                p.requires_grad = False
예제 #20
0
    def __init__(
        self,
        pretrained_path,
        save_path,
        input_norm=None,
        output_norm=True,
        freeze=True,
        freeze_feature_extractor=False,
        pretrain=True,
    ):
        super().__init__()

        # Download the pretrained wav2vec2 model. It can be local or online.
        download_file(pretrained_path, save_path)

        (
            model,
            cfg,
            task,
        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path])

        # wav2vec pretrained models may need the input waveform to be normalized
        # Hence, we check if the model has be trained with or without it.
        # If the information isn't contained in the checkpoint it is set to False.
        if input_norm is None:
            if hasattr(cfg["task"], "normalize"):
                self.normalize = cfg["task"].normalize
            elif hasattr(cfg, "normalize"):
                self.normalize = cfg.normalize
            else:
                self.normalize = False
        else:
            self.normalize = input_norm

        model = model[0]
        self.model = model
        self.freeze = freeze
        self.output_norm = output_norm
        self.freeze_feature_extractor = freeze_feature_extractor
        if self.freeze:
            model.eval()
        elif self.freeze_feature_extractor:
            # Freeze the feature extractor module
            for param in self.model.feature_extractor.parameters():
                param.requires_grad = False

        # Randomly initialized layers if pretrain is False
        if not (pretrain):
            self.reset_layer(self.model)
예제 #21
0
    def load_model(self):
        """Loads the models specified in the yaml file"""
        # Embedding Model
        save_model_path = os.path.join(self.hparams["save_folder"],
                                       "embedding_model.ckpt")
        download_file(self.hparams["embedding_model_file"], save_model_path)
        state_dict = torch.load(save_model_path, map_location=self.device)
        self.embedding_model.load_state_dict(state_dict, strict=True)

        # Normalization
        if self.norm_emb:
            save_model_path = os.path.join(self.hparams["save_folder"],
                                           "mean_var_norm_emb.ckpt")
            download_file(self.hparams["embedding_norm_file"], save_model_path)
            self.mean_var_norm_emb._load(save_model_path, 0, self.device)
예제 #22
0
    def __init__(
        self,
        pretrained_path,
        save_path,
        input_norm=None,
        output_norm=True,
        freeze=True,
        pretrain=True,
    ):
        super().__init__()

        # Download the pretrained wav2vec2 model. It can be local or online.
        download_file(pretrained_path, save_path)

        (
            model,
            cfg,
            task,
        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path])

        # wav2vec pretrained models may need the input waveform to be normalized
        # Hence, we check if the model has be trained with or without it.
        # If the information isn't contained in the checkpoint IT HAS TO BE GIVEN
        # BY THE USER.
        if input_norm is None:
            if hasattr(cfg, "normalize"):
                self.normalize = cfg.normalize
            else:
                msg = "The normalize flag is not set in the loaded fairseq checkpoint. "
                msg += (
                    "Please set it to True or False. True = waveform will be "
                )
                msg += "normalized. False, it won't. This is dependent on the model."
                msg += " !!! it has to match the pretraining of the wav2vec 2.0 !!!"
                raise ValueError(msg)
        else:
            self.normalize = input_norm

        model = model[0]
        self.model = model
        self.freeze = freeze
        self.output_norm = output_norm
        if self.freeze:
            model.eval()

        # Randomly initialized layers if pretrain is False
        if not (pretrain):
            self.reset_layer(self.model)
예제 #23
0
    def __init__(
        self,
        hparams_file="hparams/pretrained.yaml",
        save_folder="lm_TAS",
        overrides={},
        freeze_params=True,
    ):
        """Downloads the pretrained modules specified in the yaml"""
        super().__init__()

        self.save_folder = save_folder

        # Download yaml file from the web
        save_file = os.path.join(save_folder, "LM_TAS.yaml")
        download_file(hparams_file, save_file)
        hparams_file = save_file

        # Loading modules defined in the yaml file
        with open(hparams_file) as fin:
            overrides["save_folder"] = save_folder
            self.hparams = load_hyperpyyaml(fin, overrides)

        if not os.path.isdir(self.hparams["save_folder"]):
            os.makedirs(self.hparams["save_folder"])

        # putting modules on the right device
        # We need to check if DDP has been initialised
        # in order to give the right device
        if torch.distributed.is_initialized():
            self.device = ":".join(
                [self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"]]
            )
        else:
            self.device = self.hparams["device"]

        self.net = self.hparams["net"].to(self.device)

        # Load pretrained modules
        self.load_lm()

        # Load tokenizer
        self.tokenizer = self.hparams["tokenizer"].spm

        # If we don't want to backprop, freeze the pretrained parameters
        if freeze_params:
            self.net.eval()
            for p in self.net.parameters():
                p.requires_grad = False
예제 #24
0
def load_pretrained(hparams):
    """This function loads a pre-trained ASR model's parameters to the model
    defined by the user. It can either be a web-url or a simple path.


    Arguments
    ---------
    hparams : dict
        This dictionary is loaded from the `train.yaml` file, and it includes
        all the hyperparameters needed for dataset construction and loading.
        Expects the dict to have "save_folder" and "model" and "pretrain_model"
    """
    save_model_path = os.path.join(hparams["save_folder"],
                                   "pretrained_model.ckpt")
    download_file(hparams["pretrain_model"], save_model_path)
    hparams["model"].load_state_dict(torch.load(save_model_path), strict=True)
예제 #25
0
    def load_tokenizer(self):
        """Loads the sentence piece tokenizer specified in the yaml file"""
        save_model_path = os.path.join(
            self.hparams["save_folder"],
            str(self.hparams["output_neurons"]) + "_unigram.model",
        )

        # Downloading from the web
        download_file(
            source=self.hparams["tok_mdl_file"],
            dest=save_model_path,
        )

        # Initialize and pre-train the tokenizer
        self.mod.tokenizer = SentencePiece(
            model_dir=self.hparams["save_folder"],
            vocab_size=self.hparams["output_neurons"],
        )
        self.mod.tokenizer.sp.load(save_model_path)
예제 #26
0
def create_lexicon(lexicon_save_filepath):
    """
    Creates the lexicon object, downloading if it hasn't been done yet.

    Arguments
    ---------
    lexicon_save_filepath : str
        Path to save the lexicon when downloading
    """
    if not os.path.isfile(lexicon_save_filepath):
        download_file(LEXICON_URL, lexicon_save_filepath)

    lexicon = MISSING_LEXICON
    for line in open(lexicon_save_filepath):
        line = line.split()
        phns = " ".join(p.strip("012") for p in line[1:])
        lexicon[remove_punctuation(line[0])] = phns

    return lexicon
예제 #27
0
    def load_tokenizer(self):
        """Loads the sentence piece tokinizer specified in the yaml file"""
        save_model_path = self.hparams.save_folder + "/tok_unigram.model"
        save_vocab_path = self.hparams.save_folder + "/tok_unigram.vocab"

        if hasattr(self.hparams, "tok_mdl_file"):
            download_file(
                source=self.hparams.tok_mdl_file,
                dest=save_model_path,
                replace_existing=True,
            )
            self.hparams.tokenizer.sp.load(save_model_path)

        if hasattr(self.hparams, "tok_voc_file"):
            download_file(
                source=self.hparams.tok_voc_file,
                dest=save_vocab_path,
                replace_existing=True,
            )
예제 #28
0
Data download (dataset available at https://osf.io/etdkz/).
Reference to ERPCore: E. S. Kappenman et al., Neuroimage 2021 (https://doi.org/10.1016/j.neuroimage.2020.117465).

Author
------
Davide Borra, 2021
"""

import argparse
import os
from speechbrain.utils.data_utils import download_file


ERPCORE_P3_URL = "https://files.osf.io/v1/resources/etdkz/providers/osfstorage/60077b04ba010908a78927e9/?zip="
parser = argparse.ArgumentParser(
    "Python script to download required recipe data"
)
parser.add_argument(
    "--data_folder", type=str, required=True, help="Target data directory"
)
args = parser.parse_args()

os.makedirs(args.data_folder, exist_ok=True)
print("Downloading ERPCore P3 dataset...")
download_file(
    ERPCORE_P3_URL,
    os.path.join(args.data_folder, "ERPCore_P3.zip"),
    unpack=True,
)
print("Successfully downloaded and unpacked in {0}".format(args.data_folder))
def create_lexicon_and_oov_csv(all_texts, data_folder, save_folder):
    """
    Creates lexicon csv files useful for traning and testing a
    graphene-to-phonene (G2P) model.

    Arguments
    ---------
    all_text : dict
        Dictionary contaning text from the librispeech transcriptions
    data_folder : str
        Path to the folder where the original LibriSpeech dataset is stored.
    save_folder : str
        The directory where to store the csv files.
    Returns
    -------
    None
    """
    # If the lexicon file does not exist, download it
    lexicon_url = "http://www.openslr.org/resources/11/librispeech-lexicon.txt"
    lexicon_path = os.path.join(save_folder, "librispeech-lexicon.txt")

    if not os.path.isfile(lexicon_path):
        logger.info("Lexicon file not found. Downloading from %s." %
                    lexicon_url)
        download_file(lexicon_url, lexicon_path)

    # Get list of all words in the transcripts
    transcript_words = Counter()
    for key in all_texts:
        transcript_words.update(all_texts[key].split("_"))

    # Get list of all words in the lexicon
    lexicon_words = []
    lexicon_pronunciations = []
    with open(lexicon_path, "r") as f:
        lines = f.readlines()
        for line in lines:
            word = line.split()[0]
            pronunciation = line.split()[1:]
            lexicon_words.append(word)
            lexicon_pronunciations.append(pronunciation)

    # Create lexicon.csv
    header = "ID,duration,char,char_format, char_opts,phn,phn_format,phn_opts\n"
    lexicon_csv_path = os.path.join(save_folder, "lexicon.csv")
    with open(lexicon_csv_path, "w") as f:
        f.write(header)
        for idx in range(len(lexicon_words)):
            separated_graphemes = [c for c in lexicon_words[idx]]
            duration = len(separated_graphemes)
            graphemes = " ".join(separated_graphemes)
            pronunciation_no_numbers = [
                p.strip("0123456789") for p in lexicon_pronunciations[idx]
            ]
            phonemes = " ".join(pronunciation_no_numbers)
            line = (",".join([
                str(idx),
                str(duration),
                graphemes,
                "string",
                "",
                phonemes,
                "string",
                "",
            ]) + "\n")
            f.write(line)
    logger.info("Lexicon written to %s." % lexicon_csv_path)

    # Split lexicon.csv in train, validation, and test splits
    split_lexicon(save_folder, [98, 1, 1])
if __name__ == "__main__":

    # Logger setup
    logger = logging.getLogger(__name__)
    current_dir = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(os.path.dirname(current_dir))

    # Load hyperparameters file with command-line overrides
    params_file, run_opts, overrides = sb.core.parse_arguments(sys.argv[1:])
    with open(params_file) as fin:
        params = load_hyperpyyaml(fin, overrides)

    # Download verification list (to exlude verification sentences from train)
    veri_file_path = os.path.join(
        params["save_folder"], os.path.basename(params["verification_file"]))
    download_file(params["verification_file"], veri_file_path)

    from voxceleb_prepare import prepare_voxceleb  # noqa E402

    # Create experiment directory
    sb.core.create_experiment_directory(
        experiment_directory=params["output_folder"],
        hyperparams_to_save=params_file,
        overrides=overrides,
    )

    # Prepare data from dev of Voxceleb1
    logger.info("Data preparation")
    prepare_voxceleb(
        data_folder=params["data_folder"],
        save_folder=params["save_folder"],