def get_zip_manifest( zip_path: Path, zip_root: Optional[Path] = None, is_audio=False ): _zip_path = Path.joinpath(zip_root or Path(""), zip_path) with zipfile.ZipFile(_zip_path, mode="r") as f: info = f.infolist() paths, lengths = {}, {} for i in tqdm(info): utt_id = Path(i.filename).stem offset, file_size = i.header_offset + 30 + len(i.filename), i.file_size paths[utt_id] = f"{zip_path.as_posix()}:{offset}:{file_size}" with open(_zip_path, "rb") as f: f.seek(offset) byte_data = f.read(file_size) assert len(byte_data) > 1 if is_audio: assert is_sf_audio_data(byte_data), i else: assert is_npy_data(byte_data), i byte_data_fp = io.BytesIO(byte_data) if is_audio: lengths[utt_id] = sf.info(byte_data_fp).frames else: lengths[utt_id] = np.load(byte_data_fp).shape[0] return paths, lengths
def __getitem__(self, index): import soundfile as sf path_or_fp = os.path.join(self.root_dir, str(self.fnames[index])) _path, slice_ptr = parse_path(path_or_fp) if len(slice_ptr) == 2: byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) assert is_sf_audio_data(byte_data) path_or_fp = io.BytesIO(byte_data) if random.random() < self.noise_rir_prob and self.is_training: wav = self.noise_rir_dataset.add_noise_rir(path_or_fp) curr_sample_rate = self.sample_rate else: wav, curr_sample_rate = sf.read(path_or_fp, dtype="float32") feats = torch.from_numpy(wav).float() feats = self.postprocess(feats, curr_sample_rate) if random.random() < self.speed_perturb_prob and self.is_training: feats = self.sp(feats) if random.random() < self.volume_perturb_prob and self.is_training: feats = volume_perturb(feats) if self.is_save: save_path = os.path.join( self.is_save_path, _path.split('/')[-1].split('.')[0]) + '_augtment.wav' self.save_to_wav(feats, save_path) return {"id": index, "source": feats}
def get_features_or_waveform_from_stored_zip( path, byte_offset, byte_size, need_waveform=False ): assert path.endswith(".zip") data = read_from_stored_zip(path, byte_offset, byte_size) f = io.BytesIO(data) if is_npy_data(data): features_or_waveform = np.load(f) elif is_sf_audio_data(data): features_or_waveform = \ get_waveform(f, always_2d=False)[0] if need_waveform else get_fbank(f) else: raise ValueError(f'Unknown file format for "{path}"') return features_or_waveform
def __getitem__(self, index): import soundfile as sf path_or_fp = os.path.join(self.root_dir, str(self.fnames[index])) _path, slice_ptr = parse_path(path_or_fp) if len(slice_ptr) == 2: byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) assert is_sf_audio_data(byte_data) path_or_fp = io.BytesIO(byte_data) wav, curr_sample_rate = sf.read(path_or_fp, dtype="float32") feats = torch.from_numpy(wav).float() feats = self.postprocess(feats, curr_sample_rate) return {"id": index, "source": feats}