def __init__( self, pretrained_path, save_path, output_norm=True, freeze=True, pretrain=True, ): super().__init__() # Download the pretrained wav2vec2 model. It can be local or online. download_file(pretrained_path, save_path) ( model, cfg, task, ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path]) # wav2vec pretrained models may need the input waveform to be normalized # Hence, we check if the model has be trained with or without it. self.normalize = cfg.normalize model = model[0] self.model = model self.freeze = freeze self.output_norm = output_norm if self.freeze: model.eval() # Randomly initialized layers if pretrain is False if not (pretrain): self.reset_layer(self.model)
def download_to_dir(url, directory): """Parse filename from url and download to directory.""" print("called download_to_dir") os.makedirs(directory, exist_ok=True) filename = os.path.basename(urllib.parse.urlparse(url).path) download_file(url, os.path.join(directory, filename)) return os.path.join(directory, filename)
def __init__( self, pretrained_path, save_path, output_norm=True, freeze=True, pretrain=True, ): super().__init__() self.freeze = freeze self.output_norm = output_norm # Download the pretrained wav2vec1 model. It can be local or online. download_file(pretrained_path, save_path) ( model, cfg, task, ) = fairseq.checkpoint_utils.load_model_ensemble_and_task( [pretrained_path] ) self.model = model self.model = self.model[0] if self.freeze: model.eval() # Randomly initialized layers if pretrain is False if not (pretrain): self.reset_layer(self.model)
def create_lexicon(lexicon_save_filepath): """ Creates the lexicon object, downloading if it hasn't been done yet. Arguments --------- lexicon_save_filepath : str Path to save the lexicon when downloading """ if not os.path.isfile(lexicon_save_filepath): download_file(LEXICON_URL, lexicon_save_filepath) # Iterate lexicon file and add the first pronunciation in the file for # each word to our lexicon dictionary lexicon = MISSING_LEXICON delayed_words = {} for line in open(lexicon_save_filepath): line = line.split() phns = " ".join(p.strip("012") for p in line[1:]) # Don't add words with punctuation until we can be sure they won't # overwrite words without punctuation. clean_word = remove_punctuation(line[0]) if clean_word != line[0] and clean_word not in delayed_words: delayed_words[clean_word] = phns elif clean_word == line[0] and clean_word not in lexicon: lexicon[clean_word] = phns # Add words with punctuation if they won't overwrite non-punctuated words for word, phns in delayed_words.items(): if word not in lexicon: lexicon[word] = phns return lexicon
def download_and_pretrain(): """Downloads pre-trained model """ save_model_path = params["model_dir"] + "/emb.ckpt" download_file(params["embedding_file"], save_model_path) params["embedding_model"].load_state_dict(torch.load(save_model_path), strict=True)
def _prepare_openrir(folder, reverb_csv, noise_csv, max_noise_len): """Prepare the openrir dataset for adding reverb and noises. Arguments --------- folder : str The location of the folder containing the dataset. reverb_csv : str Filename for storing the prepared reverb csv. noise_csv : str Filename for storing the prepared noise csv. max_noise_len : float The maximum noise length in seconds. Noises longer than this will be cut into pieces. """ # Download and unpack if necessary filepath = os.path.join(folder, "rirs_noises.zip") if not os.path.isdir(os.path.join(folder, "RIRS_NOISES")): download_file(OPENRIR_URL, filepath, unpack=True) else: download_file(OPENRIR_URL, filepath) # Prepare reverb csv if necessary if not os.path.isfile(reverb_csv): rir_filelist = os.path.join(folder, "RIRS_NOISES", "real_rirs_isotropic_noises", "rir_list") _prepare_csv(folder, rir_filelist, reverb_csv) # Prepare noise csv if necessary if not os.path.isfile(noise_csv): noise_filelist = os.path.join(folder, "RIRS_NOISES", "pointsource_noises", "noise_list") _prepare_csv(folder, noise_filelist, noise_csv, max_noise_len)
def load_lm(self): """Loads the LM specified in the yaml file""" save_model_path = os.path.join(self.save_folder, "lm.ckpt") download_file(self.hparams["lm_ckpt_file"], save_model_path) # Load downloaded model, removing prefix state_dict = torch.load(save_model_path, map_location=self.device) self.model.load_state_dict(state_dict, strict=True)
def load_lm(self): """Loads the LM specified in the yaml file""" save_model_path = os.path.join(self.hparams.output_folder, "save", "lm_model.ckpt") download_file(self.hparams.lm_ckpt_file, save_model_path) state_dict = torch.load(save_model_path) self.modules.lm_model.load_state_dict(state_dict, strict=True) self.modules.lm_model.eval()
def load_lm(self): """Loads the LM specified in the yaml file""" save_model_path = os.path.join(self.hparams.output_folder, "save", "lm_model.ckpt") download_file(self.hparams.lm_ckpt_file, save_model_path) # Load downloaded model, removing prefix state_dict = torch.load(save_model_path) self.hparams.modules["lm_model"].load_state_dict(state_dict)
def download_and_pretrain(): """ Downloads the specified pre-trained model """ if "http" in params["embedding_file"]: save_model_path = params["output_folder"] + "/save/embedding_model.ckpt" download_file(params["embedding_file"], save_model_path) else: save_model_path = params["embedding_file"] params["embedding_model"].load_state_dict(torch.load(save_model_path), strict=True)
def load_asr(self): """Loads the AM specified in the yaml file""" save_model_path = os.path.join(self.hparams["save_folder"], "asr_model.ckpt") if "http" in self.hparams["asr_ckpt_file"]: download_file(self.hparams["asr_ckpt_file"], save_model_path) self.mod.asr_model.load_state_dict(torch.load( save_model_path, map_location=self.device), strict=True)
def __init__( self, hparams_file="https://www.dropbox.com/s/ct72as3hapy8kb5/ecapa_big.yaml?dl=1", overrides={}, freeze_params=True, norm_emb=True, save_folder="emb_model", ): """Downloads the pretrained modules specified in the yaml""" super().__init__() self.norm_emb = norm_emb save_model_path = os.path.join(save_folder, "embedding.yaml") download_file(hparams_file, save_model_path) hparams_file = save_model_path # Loading modules defined in the yaml file with open(hparams_file) as fin: overrides["save_folder"] = save_folder self.hparams = load_hyperpyyaml(fin, overrides) # putting modules on the right device # We need to check if DDP has been initialised # in order to give the right device if torch.distributed.is_initialized(): self.device = ":".join([ self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"] ]) else: self.device = self.hparams["device"] # Creating directory where pre-trained models are stored if not os.path.isabs(self.hparams["save_folder"]): dirname = os.path.dirname(__file__) self.hparams["save_folder"] = os.path.join( dirname, self.hparams["save_folder"]) if not os.path.isdir(self.hparams["save_folder"]): os.makedirs(self.hparams["save_folder"]) # putting modules on the right device self.embedding_model = self.hparams["embedding_model"].to(self.device) self.mean_var_norm = self.hparams["mean_var_norm"].to(self.device) self.mean_var_norm_emb = self.hparams["mean_var_norm_emb"].to( self.device) self.similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) # Load pretrained modules self.load_model() # If we don't want to backprop, freeze the pretrained parameters if freeze_params: self.embedding_model.eval() for p in self.embedding_model.parameters(): p.requires_grad = False
def load_lm(self): """Loads the LM specified in the yaml file""" save_model_path = os.path.join(self.hparams.output_folder, "save", "lm_model.ckpt") if not os.path.isfile(save_model_path): download_file(self.hparams.language_model_file, save_model_path) # Load downloaded model, removing prefix state_dict = torch.load(save_model_path, map_location=self.device) self.hparams.lm_model.load_state_dict(state_dict, strict=True) self.hparams.lm_model.eval()
def download_and_pretrain(self): """ Downloads the specified pre-trained model """ save_model_path = ( hparams["output_folder"] + "/save/embedding_model.ckpt" ) if "http" in hparams["embedding_file"]: download_file(hparams["embedding_file"], save_model_path) hparams["embedding_model"].load_state_dict( torch.load(save_model_path), strict=True )
def __init__(self, tokenizer_file, save_folder="model_checkpoints"): super().__init__() save_file = os.path.join(save_folder, "tok.model") download_file( source=tokenizer_file, dest=save_file, replace_existing=False, ) # Defining tokenizer and loading it self.spm = spm.SentencePieceProcessor() self.spm.load(save_file)
def download_mini_librispeech(destination): """Download dataset and unpack it. Arguments --------- destination : str Place to put dataset. """ train_archive = os.path.join(destination, "train-clean-5.tar.gz") download_file(MINILIBRI_TRAIN_URL, train_archive) shutil.unpack_archive(train_archive, destination)
def load_lm(self): """Loads the LM specified in the yaml file""" save_model_path = os.path.join(self.hparams.output_folder, "save", "lm_model.ckpt") download_file(self.hparams.language_model_file, save_model_path) # Load downloaded model, removing prefix state_dict = torch.load(save_model_path, map_location=torch.device(self.device)) state_dict = {k.split(".", 1)[1]: v for k, v in state_dict.items()} self.hparams.lm_model.load_state_dict(state_dict, strict=True) self.hparams.lm_model.eval() logger.info("loaded LM from {}".format(save_model_path))
def __init__(self, tokenizer_file): super().__init__() if "http" in tokenizer_file: temp_dir = tempfile.TemporaryDirectory() save_file = os.path.join(temp_dir.name, "tok.model") download_file( source=tokenizer_file, dest=save_file, replace_existing=True, ) tokenizer_file = save_file # Defining tokenizer and loading it self.spm = spm.SentencePieceProcessor() self.spm.load(tokenizer_file)
def __init__( self, hparams_file="https://www.dropbox.com/s/54vmm04g3gezwz3/pretrained_ASR_BPE1000.yaml?dl=1", save_folder="asr_model", overrides={}, freeze_params=True, ): """Downloads the pretrained modules specified in the yaml""" super().__init__() save_model_path = os.path.join(save_folder, "ASR.yaml") download_file(hparams_file, save_model_path) hparams_file = save_model_path # Loading modules defined in the yaml file with open(hparams_file) as fin: overrides["save_folder"] = save_folder self.hparams = load_hyperpyyaml(fin, overrides) # putting modules on the right device # We need to check if DDP has been initialised # in order to give the right device if torch.distributed.is_initialized(): self.device = ":".join([ self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"] ]) else: self.device = self.hparams["device"] # Creating directory where pre-trained models are stored if not os.path.isdir(self.hparams["save_folder"]): os.makedirs(self.hparams["save_folder"]) # putting modules on the right device self.mod = torch.nn.ModuleDict(self.hparams["modules"]).to(self.device) # Load pretrained modules self.load_asr() # The tokenizer is the one used by the LM self.tokenizer = self.hparams["lm_model"].tokenizer # If we don't want to backprop, freeze the pretrained parameters if freeze_params: self.mod.asr_model.eval() for p in self.mod.asr_model.parameters(): p.requires_grad = False self.mod.lm_model.eval() for p in self.mod.lm_model.parameters(): p.requires_grad = False
def __init__( self, pretrained_path, save_path, input_norm=None, output_norm=True, freeze=True, freeze_feature_extractor=False, pretrain=True, ): super().__init__() # Download the pretrained wav2vec2 model. It can be local or online. download_file(pretrained_path, save_path) ( model, cfg, task, ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path]) # wav2vec pretrained models may need the input waveform to be normalized # Hence, we check if the model has be trained with or without it. # If the information isn't contained in the checkpoint it is set to False. if input_norm is None: if hasattr(cfg["task"], "normalize"): self.normalize = cfg["task"].normalize elif hasattr(cfg, "normalize"): self.normalize = cfg.normalize else: self.normalize = False else: self.normalize = input_norm model = model[0] self.model = model self.freeze = freeze self.output_norm = output_norm self.freeze_feature_extractor = freeze_feature_extractor if self.freeze: model.eval() elif self.freeze_feature_extractor: # Freeze the feature extractor module for param in self.model.feature_extractor.parameters(): param.requires_grad = False # Randomly initialized layers if pretrain is False if not (pretrain): self.reset_layer(self.model)
def load_model(self): """Loads the models specified in the yaml file""" # Embedding Model save_model_path = os.path.join(self.hparams["save_folder"], "embedding_model.ckpt") download_file(self.hparams["embedding_model_file"], save_model_path) state_dict = torch.load(save_model_path, map_location=self.device) self.embedding_model.load_state_dict(state_dict, strict=True) # Normalization if self.norm_emb: save_model_path = os.path.join(self.hparams["save_folder"], "mean_var_norm_emb.ckpt") download_file(self.hparams["embedding_norm_file"], save_model_path) self.mean_var_norm_emb._load(save_model_path, 0, self.device)
def __init__( self, pretrained_path, save_path, input_norm=None, output_norm=True, freeze=True, pretrain=True, ): super().__init__() # Download the pretrained wav2vec2 model. It can be local or online. download_file(pretrained_path, save_path) ( model, cfg, task, ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([save_path]) # wav2vec pretrained models may need the input waveform to be normalized # Hence, we check if the model has be trained with or without it. # If the information isn't contained in the checkpoint IT HAS TO BE GIVEN # BY THE USER. if input_norm is None: if hasattr(cfg, "normalize"): self.normalize = cfg.normalize else: msg = "The normalize flag is not set in the loaded fairseq checkpoint. " msg += ( "Please set it to True or False. True = waveform will be " ) msg += "normalized. False, it won't. This is dependent on the model." msg += " !!! it has to match the pretraining of the wav2vec 2.0 !!!" raise ValueError(msg) else: self.normalize = input_norm model = model[0] self.model = model self.freeze = freeze self.output_norm = output_norm if self.freeze: model.eval() # Randomly initialized layers if pretrain is False if not (pretrain): self.reset_layer(self.model)
def __init__( self, hparams_file="hparams/pretrained.yaml", save_folder="lm_TAS", overrides={}, freeze_params=True, ): """Downloads the pretrained modules specified in the yaml""" super().__init__() self.save_folder = save_folder # Download yaml file from the web save_file = os.path.join(save_folder, "LM_TAS.yaml") download_file(hparams_file, save_file) hparams_file = save_file # Loading modules defined in the yaml file with open(hparams_file) as fin: overrides["save_folder"] = save_folder self.hparams = load_hyperpyyaml(fin, overrides) if not os.path.isdir(self.hparams["save_folder"]): os.makedirs(self.hparams["save_folder"]) # putting modules on the right device # We need to check if DDP has been initialised # in order to give the right device if torch.distributed.is_initialized(): self.device = ":".join( [self.hparams["device"].split(":")[0], os.environ["LOCAL_RANK"]] ) else: self.device = self.hparams["device"] self.net = self.hparams["net"].to(self.device) # Load pretrained modules self.load_lm() # Load tokenizer self.tokenizer = self.hparams["tokenizer"].spm # If we don't want to backprop, freeze the pretrained parameters if freeze_params: self.net.eval() for p in self.net.parameters(): p.requires_grad = False
def load_pretrained(hparams): """This function loads a pre-trained ASR model's parameters to the model defined by the user. It can either be a web-url or a simple path. Arguments --------- hparams : dict This dictionary is loaded from the `train.yaml` file, and it includes all the hyperparameters needed for dataset construction and loading. Expects the dict to have "save_folder" and "model" and "pretrain_model" """ save_model_path = os.path.join(hparams["save_folder"], "pretrained_model.ckpt") download_file(hparams["pretrain_model"], save_model_path) hparams["model"].load_state_dict(torch.load(save_model_path), strict=True)
def load_tokenizer(self): """Loads the sentence piece tokenizer specified in the yaml file""" save_model_path = os.path.join( self.hparams["save_folder"], str(self.hparams["output_neurons"]) + "_unigram.model", ) # Downloading from the web download_file( source=self.hparams["tok_mdl_file"], dest=save_model_path, ) # Initialize and pre-train the tokenizer self.mod.tokenizer = SentencePiece( model_dir=self.hparams["save_folder"], vocab_size=self.hparams["output_neurons"], ) self.mod.tokenizer.sp.load(save_model_path)
def create_lexicon(lexicon_save_filepath): """ Creates the lexicon object, downloading if it hasn't been done yet. Arguments --------- lexicon_save_filepath : str Path to save the lexicon when downloading """ if not os.path.isfile(lexicon_save_filepath): download_file(LEXICON_URL, lexicon_save_filepath) lexicon = MISSING_LEXICON for line in open(lexicon_save_filepath): line = line.split() phns = " ".join(p.strip("012") for p in line[1:]) lexicon[remove_punctuation(line[0])] = phns return lexicon
def load_tokenizer(self): """Loads the sentence piece tokinizer specified in the yaml file""" save_model_path = self.hparams.save_folder + "/tok_unigram.model" save_vocab_path = self.hparams.save_folder + "/tok_unigram.vocab" if hasattr(self.hparams, "tok_mdl_file"): download_file( source=self.hparams.tok_mdl_file, dest=save_model_path, replace_existing=True, ) self.hparams.tokenizer.sp.load(save_model_path) if hasattr(self.hparams, "tok_voc_file"): download_file( source=self.hparams.tok_voc_file, dest=save_vocab_path, replace_existing=True, )
Data download (dataset available at https://osf.io/etdkz/). Reference to ERPCore: E. S. Kappenman et al., Neuroimage 2021 (https://doi.org/10.1016/j.neuroimage.2020.117465). Author ------ Davide Borra, 2021 """ import argparse import os from speechbrain.utils.data_utils import download_file ERPCORE_P3_URL = "https://files.osf.io/v1/resources/etdkz/providers/osfstorage/60077b04ba010908a78927e9/?zip=" parser = argparse.ArgumentParser( "Python script to download required recipe data" ) parser.add_argument( "--data_folder", type=str, required=True, help="Target data directory" ) args = parser.parse_args() os.makedirs(args.data_folder, exist_ok=True) print("Downloading ERPCore P3 dataset...") download_file( ERPCORE_P3_URL, os.path.join(args.data_folder, "ERPCore_P3.zip"), unpack=True, ) print("Successfully downloaded and unpacked in {0}".format(args.data_folder))
def create_lexicon_and_oov_csv(all_texts, data_folder, save_folder): """ Creates lexicon csv files useful for traning and testing a graphene-to-phonene (G2P) model. Arguments --------- all_text : dict Dictionary contaning text from the librispeech transcriptions data_folder : str Path to the folder where the original LibriSpeech dataset is stored. save_folder : str The directory where to store the csv files. Returns ------- None """ # If the lexicon file does not exist, download it lexicon_url = "http://www.openslr.org/resources/11/librispeech-lexicon.txt" lexicon_path = os.path.join(save_folder, "librispeech-lexicon.txt") if not os.path.isfile(lexicon_path): logger.info("Lexicon file not found. Downloading from %s." % lexicon_url) download_file(lexicon_url, lexicon_path) # Get list of all words in the transcripts transcript_words = Counter() for key in all_texts: transcript_words.update(all_texts[key].split("_")) # Get list of all words in the lexicon lexicon_words = [] lexicon_pronunciations = [] with open(lexicon_path, "r") as f: lines = f.readlines() for line in lines: word = line.split()[0] pronunciation = line.split()[1:] lexicon_words.append(word) lexicon_pronunciations.append(pronunciation) # Create lexicon.csv header = "ID,duration,char,char_format, char_opts,phn,phn_format,phn_opts\n" lexicon_csv_path = os.path.join(save_folder, "lexicon.csv") with open(lexicon_csv_path, "w") as f: f.write(header) for idx in range(len(lexicon_words)): separated_graphemes = [c for c in lexicon_words[idx]] duration = len(separated_graphemes) graphemes = " ".join(separated_graphemes) pronunciation_no_numbers = [ p.strip("0123456789") for p in lexicon_pronunciations[idx] ] phonemes = " ".join(pronunciation_no_numbers) line = (",".join([ str(idx), str(duration), graphemes, "string", "", phonemes, "string", "", ]) + "\n") f.write(line) logger.info("Lexicon written to %s." % lexicon_csv_path) # Split lexicon.csv in train, validation, and test splits split_lexicon(save_folder, [98, 1, 1])
if __name__ == "__main__": # Logger setup logger = logging.getLogger(__name__) current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) # Load hyperparameters file with command-line overrides params_file, run_opts, overrides = sb.core.parse_arguments(sys.argv[1:]) with open(params_file) as fin: params = load_hyperpyyaml(fin, overrides) # Download verification list (to exlude verification sentences from train) veri_file_path = os.path.join( params["save_folder"], os.path.basename(params["verification_file"])) download_file(params["verification_file"], veri_file_path) from voxceleb_prepare import prepare_voxceleb # noqa E402 # Create experiment directory sb.core.create_experiment_directory( experiment_directory=params["output_folder"], hyperparams_to_save=params_file, overrides=overrides, ) # Prepare data from dev of Voxceleb1 logger.info("Data preparation") prepare_voxceleb( data_folder=params["data_folder"], save_folder=params["save_folder"],