def __init__(self, cfg: DecoderConfig, tgt_dict: Dictionary) -> None: self.tgt_dict = tgt_dict self.vocab_size = len(tgt_dict) self.nbest = cfg.nbest self.unitlm = cfg.unitlm if cfg.criterion == "ctc": self.criterion_type = CriterionType.CTC self.blank = (tgt_dict.index("<ctc_blank>") if "<ctc_blank>" in tgt_dict.indices else tgt_dict.bos()) if "<sep>" in tgt_dict.indices: self.silence = tgt_dict.index("<sep>") elif "|" in tgt_dict.indices: self.silence = tgt_dict.index("|") else: self.silence = tgt_dict.eos() self.asgtransitions = None elif cfg.criterion == "asg_loss": self.criterion_type = CriterionType.ASG self.blank = -1 self.silence = -1 self.asgtransitions = cfg.asgtransitions self.maxreplabel = cfg.maxreplabel assert len(self.asgtransitions) == self.vocab_size**2 else: raise RuntimeError(f"unknown criterion: {cfg.criterion}")
def __init__(self, bpe, dictionary: Dictionary): self.bpe = bpe self.vocab = Vocabulary( dictionary.symbols, pad_token=str(dictionary[dictionary.pad()]), bos_token=str(dictionary[dictionary.bos()]), eos_token=str(dictionary[dictionary.eos()]), ) self.bos = self.vocab.bos_token self.eos = self.vocab.eos_token
def __init__(self, tgt_dict: Dictionary) -> None: self.tgt_dict = tgt_dict self.vocab_size = len(tgt_dict) self.blank = (tgt_dict.index("<ctc_blank>") if "<ctc_blank>" in tgt_dict.indices else tgt_dict.bos()) if "<sep>" in tgt_dict.indices: self.silence = tgt_dict.index("<sep>") elif "|" in tgt_dict.indices: self.silence = tgt_dict.index("|") else: self.silence = tgt_dict.eos()