def __init__(self, task, wer_args, zero_infinity, sentence_avg, remove_bpe): super().__init__(task) self.blank_idx = task.target_dictionary.bos() self.pad_idx = task.target_dictionary.pad() self.eos_idx = task.target_dictionary.eos() self.post_process = remove_bpe if remove_bpe else "letter" if wer_args is not None: from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder wer_compute_kenlm, wer_lexicon, lm_w, ws_w = eval(wer_args) dec_args = Namespace() dec_args.nbest = 1 dec_args.criterion = "ctc" dec_args.kenlm_model = wer_compute_kenlm dec_args.lexicon = wer_lexicon dec_args.beam = 50 dec_args.beam_size_token = min(50, len(task.target_dictionary)) dec_args.beam_threshold = min(50, len(task.target_dictionary)) dec_args.lm_weight = lm_w dec_args.word_score = ws_w dec_args.unk_weight = -math.inf dec_args.sil_weight = 0 self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) else: self.w2l_decoder = None self.zero_infinity = zero_infinity self.sentence_avg = sentence_avg
def build_generator(cfg: UnsupGenerateConfig): w2l_decoder = cfg.w2l_decoder if w2l_decoder == DecoderType.VITERBI: from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(cfg, task.target_dictionary) elif w2l_decoder == DecoderType.KENLM: from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder return W2lKenLMDecoder(cfg, task.target_dictionary) elif w2l_decoder == DecoderType.FAIRSEQ: from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder return W2lFairseqLMDecoder(cfg, task.target_dictionary) elif w2l_decoder == DecoderType.KALDI: from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoder assert cfg.kaldi_decoder_config is not None return KaldiDecoder( cfg.kaldi_decoder_config, cfg.beam, ) else: raise NotImplementedError( "only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment but found " + str(w2l_decoder) )
def build_generator(self, args): w2l_decoder = getattr(args, "w2l_decoder", None) if w2l_decoder == "viterbi": from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(args, self.target_dictionary) elif w2l_decoder == "kenlm": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder return W2lKenLMDecoder(args, self.target_dictionary) else: return super().build_generator(args)
def get_decoder(decoder_args_dict, dictionary): decoder_args = Namespace(**decoder_args_dict) if decoder_args.decoder_type == "viterbi": from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(decoder_args, dictionary) elif decoder_args.decoder_type == "kenlm": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder decoder_args.beam_size_token = len(dictionary) if isinstance(decoder_args.unk_weight, str): decoder_args.unk_weight = eval(decoder_args.unk_weight) return W2lKenLMDecoder(decoder_args, dictionary) return None
def build_generator(args): w2l_decoder = getattr(args, "w2l_decoder", None) if w2l_decoder == "viterbi": from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(args, task.target_dictionary) elif w2l_decoder == "kenlm": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder print(task.target_dictionary.symbols) return W2lKenLMDecoder(args, task.target_dictionary) elif w2l_decoder == "fairseqlm": from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder print(task.target_dictionary.symbols) return W2lFairseqLMDecoder(args, task.target_dictionary) else: return super().build_generator(args)
def build_generator(args): w2l_decoder = getattr(args, "w2l_decoder", None) if w2l_decoder == "viterbi": from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(args, task.target_dictionary) elif w2l_decoder == "kenlm": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder return W2lKenLMDecoder(args, task.target_dictionary) elif w2l_decoder == "fairseqlm": from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder return W2lFairseqLMDecoder(args, task.target_dictionary) else: print('only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment')
def __init__(self, cfg: CtcCriterionConfig, task: FairseqTask): super().__init__(task) self.blank_idx = ( task.target_dictionary.index(task.blank_symbol) if hasattr(task, "blank_symbol") else 0 ) self.pad_idx = task.target_dictionary.pad() self.eos_idx = task.target_dictionary.eos() self.post_process = cfg.post_process if cfg.wer_args is not None: ( cfg.wer_kenlm_model, cfg.wer_lexicon, cfg.wer_lm_weight, cfg.wer_word_score, ) = eval(cfg.wer_args) if cfg.wer_kenlm_model is not None and cfg.wer_kenlm_model != "": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder dec_args = Namespace() dec_args.nbest = 1 dec_args.criterion = "ctc" dec_args.kenlm_model = cfg.wer_kenlm_model dec_args.lexicon = cfg.wer_lexicon dec_args.beam = 50 dec_args.beam_size_token = min(50, len(task.target_dictionary)) dec_args.beam_threshold = min(50, len(task.target_dictionary)) dec_args.lm_weight = cfg.wer_lm_weight dec_args.word_score = cfg.wer_word_score dec_args.unk_weight = -math.inf dec_args.sil_weight = 0 self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) else: self.w2l_decoder = None self.zero_infinity = cfg.zero_infinity self.sentence_avg = cfg.sentence_avg
def build_generator(args): w2l_decoder = getattr(args, "w2l_decoder", None) if w2l_decoder == "viterbi": from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder return W2lViterbiDecoder(args, task.target_dictionary) elif w2l_decoder == "kenlm": from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder return W2lKenLMDecoder(args, task.target_dictionary) elif w2l_decoder == "fairseqlm": from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder return W2lFairseqLMDecoder(args, task.target_dictionary) elif w2l_decoder == "ctc_decoder": from examples.speech_recognition.ctc_decoder import CTCDecoder return CTCDecoder(args, task.target_dictionary) elif w2l_decoder == "cif_decoder": from examples.speech_recognition.cif_decoder import CIFDecoder return CIFDecoder(args, task.target_dictionary, {}) elif w2l_decoder == "cif_lm_decoder": from examples.speech_recognition.cif_decoder import CIFDecoder return CIFDecoder(args, task.target_dictionary, ({}, {})) elif w2l_decoder == "cif_bert_decoder": from examples.speech_recognition.cif_bert_decoder import CIF_BERT_Decoder return CIF_BERT_Decoder(args, task.target_dictionary) elif w2l_decoder == "seq2seq_decoder": from examples.speech_recognition.seq2seq_decoder import Seq2seqDecoder return Seq2seqDecoder(args, task.target_dictionary, {}) elif w2l_decoder == "seq2seq_lm_decoder": from examples.speech_recognition.seq2seq_decoder import Seq2seqDecoder return Seq2seqDecoder(args, task.target_dictionary, ({}, {})) else: return super().build_generator(args)