Esempio n. 1
0
    def __init__(self,
                 mix_scp: str = "",
                 doa_scp: Union[str, List[str]] = "",
                 emb_scp: str = "",
                 ref_scp: Union[str, List[str]] = "",
                 sr: int = 16000) -> None:
        self.mix = AudioReader(mix_scp, sr=sr)
        if isinstance(ref_scp, list):
            self.ref = [AudioReader(ref, sr=sr) for ref in ref_scp]
            self.num_ref = len(ref_scp)
        elif ref_scp:
            self.ref = AudioReader(ref_scp, sr=sr)
            self.num_ref = 1
        else:
            self.ref = None
            self.num_ref = 0
        self.num_doa = 0
        if isinstance(doa_scp, list):
            self.doa = [
                BaseReader(doa, value_processor=lambda x: np.float32(x))
                for doa in doa_scp
            ]
            self.num_doa = len(doa_scp)
        elif not doa_scp:
            self.doa = None
        else:
            self.doa = BaseReader(doa_scp,
                                  value_processor=lambda x: np.float32(x))
            self.num_doa = 1

        self.emb = NumpyReader(emb_scp) if emb_scp else None
Esempio n. 2
0
 def _pre_process(self,
                  text: str,
                  utt2dur: str,
                  max_token_num: int = 400,
                  min_token_num: int = 2,
                  skip_utts: str = "",
                  max_dur: float = 3000,
                  min_dur: float = 40) -> List[Dict]:
     """
     Preprocess function to filter the utterances
     """
     if skip_utts:
         with open(skip_utts, "r") as skip_fd:
             skip_keys = [k.strip() for k in skip_fd.readlines()]
     else:
         skip_keys = []
     utt2dur = BaseReader(utt2dur, value_processor=float)
     if self.vocab_dict:
         text_reader = BaseReader(text, num_tokens=-1, restrict=False)
     else:
         text_reader = BaseReader(
             text,
             value_processor=lambda tok: list(map(int, tok)),
             num_tokens=-1,
             restrict=False)
     token_set = []
     drop_utts = 0
     for key, tokens in text_reader:
         num_toks = len(tokens)
         if num_toks > max_token_num or num_toks < min_token_num:
             drop_utts += 1
             continue
         if key not in utt2dur:
             drop_utts += 1
             continue
         if key in skip_keys:
             continue
         num_frames = utt2dur[key]
         if num_frames < min_dur or num_frames > max_dur:
             drop_utts += 1
             continue
         token_set.append({
             "key": key,
             "dur": num_frames,
             "len": num_toks,
             "tok": tokens
         })
     # long -> short
     token_set = sorted(token_set, key=lambda d: d["dur"], reverse=True)
     if drop_utts:
         warnings.warn(f"Drop {drop_utts} utterances")
     return token_set
Esempio n. 3
0
def run(args):
    scp_out = open(args.scp, "w") if args.scp else None
    with open(args.wav_ark, "wb") as wav_ark:
        reader = BaseReader(args.wav_scp, num_tokens=2, restrict=True)
        done = 0
        for key, value in reader:
            wav_ark.write(str.encode(key + " "))
            offset = wav_ark.tell()
            if value[-1] == "|":
                p = subprocess.Popen(value[:-1],
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
                [stdout, stderr] = p.communicate()
                if p.returncode != 0:
                    stderr = bytes.decode(stderr)
                    raise RuntimeError(
                        f"Running command: \"{value[:-1]}\" failed: {stderr}")
                wav_ark.write(stdout)
            else:
                with open(value, "rb") as wav:
                    wav_ark.write(wav.read())
            if scp_out:
                scp_out.write(f"{key}\t{args.wav_ark}:{offset}\n")
            done += 1
            if done % 200 == 0:
                print(f"Processed {done} utterances...", flush=True)
        print(f"Archive {len(reader)} utterances to {args.wav_ark}")
    if scp_out:
        scp_out.close()
Esempio n. 4
0
 def __init__(self,
              spk2class: Optional[str] = None,
              name: str = "UNK",
              unit: str = "UNK") -> None:
     self.s2c = BaseReader(spk2class) if spk2class else None
     self.val = defaultdict(float)
     self.name = name
     self.unit = unit
Esempio n. 5
0
 def __init__(self,
              simu_cfg: str,
              return_in_egs: List[str] = ["mix"]) -> None:
     self.simu_cfg = BaseReader(simu_cfg, num_tokens=-1)
     self.parser = make_argparse()
     self.return_in_egs = return_in_egs
Esempio n. 6
0
 def __init__(self, text_descriptor, cer=False):
     self.readers = [
         BaseReader(t, num_tokens=-1, restrict=False)
         for t in text_descriptor.split(",")
     ]
     self.cer = cer