def __iter__(self): if self.ark_or_scp == "scp": hdf5_dict = {} with open(self.filepath, "r", encoding="utf-8") as f: for line in f: key, value = line.rstrip().split(None, 1) if ":" not in value: raise RuntimeError( "scp file for hdf5 should be like: " '"uttid filepath.h5:key": {}({})'.format( line, self.filepath)) path, h5_key = value.split(":", 1) hdf5_file = hdf5_dict.get(path) if hdf5_file is None: try: hdf5_file = SoundHDF5File(path, "r") except Exception: logging.error("Error when loading {}".format(path)) raise hdf5_dict[path] = hdf5_file try: data = hdf5_file[h5_key] except Exception: logging.error( "Error when loading {} with key={}".format( path, h5_key)) raise # Change Tuple[ndarray, int] -> Tuple[int, ndarray] # (soundfile style -> scipy style) array, rate = data if self.return_shape: array = array.shape yield key, (rate, array) # Closing all files for k in hdf5_dict: try: hdf5_dict[k].close() except Exception: pass else: if self.filepath == "-": # Required h5py>=2.9 filepath = io.BytesIO(sys.stdin.buffer.read()) else: filepath = self.filepath for key, (a, r) in SoundHDF5File(filepath, "r").items(): if self.return_shape: a = a.shape yield key, (r, a)
def __init__(self, utt2noise, utt2snr, filetype='list'): self.utt2noise_file = utt2noise self.utt2snr_file = utt2snr self.filetype = filetype self.utt2snr = {} with open(utt2noise, 'r') as f: for line in f: utt, snr = line.rstrip().split(None, 1) snr = float(snr) self.utt2snr[utt] = snr self.utt2noise = {} if filetype == 'list': with open(utt2noise, 'r') as f: for line in f: utt, filename = line.rstrip().split(None, 1) signal, rate = soundfile.read(filename, dtype='int16') self.utt2noise[utt] = (signal, rate) elif filetype == 'sound.hdf5': self.utt2noise = SoundHDF5File(utt2noise, 'r') else: raise ValueError(filetype) if set(self.utt2snr) != set(self.utt2noise): raise RuntimeError('The uttids mismatch between {} and {}'.format( utt2snr, utt2noise))
def __init__(self, wspecifier, filetype='mat', write_num_frames=None, compress=False, compression_method=2): self.writer_scp = None # Used for writing scp self.filename = None self.filetype = filetype self.kwargs = {} if filetype == 'mat': if compress: self.writer = kaldiio.WriteHelper( wspecifier, compression_method=compression_method) else: self.writer = kaldiio.WriteHelper(wspecifier) elif filetype in ['hdf5', 'sound.hdf5']: # ark,scp:out.ark,out.scp -> {'ark': 'out.ark', 'scp': 'out.scp'} ark_scp, filepath = wspecifier.split(':', 1) if ark_scp not in ['ark', 'scp,ark', 'ark,scp']: raise ValueError( '{} is not allowed: {}'.format(ark_scp, wspecifier)) ark_scps = ark_scp.split(',') filepaths = filepath.split(',') if len(ark_scps) != len(filepaths): raise ValueError( 'Mismatch: {} and {}'.format(ark_scp, filepath)) spec_dict = dict(zip(ark_scps, filepaths)) if filetype == 'sound.hdf5': self.writer = SoundHDF5File(spec_dict['ark'], 'w') else: self.writer = h5py.File(spec_dict['ark'], 'w') self.filename = spec_dict['ark'] if 'scp' in spec_dict: self.writer_scp = io.open( spec_dict['scp'], 'w', encoding='utf-8') else: raise ValueError('Not supporting: filetype={}'.format(filetype)) if write_num_frames is not None: if ':' not in write_num_frames: raise ValueError('Must include ":", write_num_frames={}' .format(write_num_frames)) nframes_type, nframes_file = write_num_frames.split(':', 1) if nframes_type != 'ark,t': raise ValueError( 'Only supporting text mode. ' 'e.g. --write-num-frames=ark,t:foo.txt :' '{}'.format(nframes_type)) self.writer_nframe = io.open(nframes_file, 'w', encoding='utf-8') else: self.writer_nframe = None
def __init__( self, utt2noise=None, lower=-20, upper=-5, utt2ratio=None, filetype="list", dbunit=True, seed=None, ): self.utt2noise_file = utt2noise self.utt2ratio_file = utt2ratio self.filetype = filetype self.dbunit = dbunit self.lower = lower self.upper = upper self.state = numpy.random.RandomState(seed) if utt2ratio is not None: # Use the scheduled ratio for each utterances self.utt2ratio = {} with open(utt2noise, "r") as f: for line in f: utt, snr = line.rstrip().split(None, 1) snr = float(snr) self.utt2ratio[utt] = snr else: # The ratio is given on runtime randomly self.utt2ratio = None if utt2noise is not None: self.utt2noise = {} if filetype == "list": with open(utt2noise, "r") as f: for line in f: utt, filename = line.rstrip().split(None, 1) signal, rate = soundfile.read(filename, dtype="int16") # Load all files in memory self.utt2noise[utt] = (signal, rate) elif filetype == "sound.hdf5": self.utt2noise = SoundHDF5File(utt2noise, "r") else: raise ValueError(filetype) else: self.utt2noise = None if utt2noise is not None and utt2ratio is not None: if set(self.utt2ratio) != set(self.utt2noise): raise RuntimeError( "The uttids mismatch between {} and {}".format( utt2ratio, utt2noise))
def __init__(self, wspecifier, write_num_frames=None, pcm_format="wav"): self.pcm_format = pcm_format spec_dict = parse_wspecifier(wspecifier) self.filename = spec_dict["ark"] self.writer = SoundHDF5File(spec_dict["ark"], "w", format=self.pcm_format) if "scp" in spec_dict: self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8") else: self.writer_scp = None if write_num_frames is not None: self.writer_nframe = get_num_frames_writer(write_num_frames) else: self.writer_nframe = None
def test_sound_hdf5_file(tmpdir, fmt): valid = {'a': np.random.randint(-100, 100, 25, dtype=np.int16), 'b': np.random.randint(-1000, 1000, 100, dtype=np.int16)} # Note: Specify the file format by extension p = tmpdir.join('test.{}.h5'.format(fmt)).strpath f = SoundHDF5File(p, 'a') for k, v in valid.items(): f[k] = (v, 8000) for k, v in valid.items(): t, r = f[k] assert r == 8000 np.testing.assert_array_equal(t, v)
def __init__(self, wspecifier, write_num_frames=None, pcm_format='wav'): self.pcm_format = pcm_format spec_dict = parse_wspecifier(wspecifier) self.filename = spec_dict['ark'] self.writer = SoundHDF5File(spec_dict['ark'], 'w', format=self.pcm_format) if 'scp' in spec_dict: self.writer_scp = open(spec_dict['scp'], 'w', encoding='utf-8') else: self.writer_scp = None if write_num_frames is not None: self.writer_nframe = get_num_frames_writer(write_num_frames) else: self.writer_nframe = None
def __init__(self, utt2rir, filetype="list"): self.utt2rir_file = utt2rir self.filetype = filetype self.utt2rir = {} if filetype == "list": with open(utt2rir, "r") as f: for line in f: utt, filename = line.rstrip().split(None, 1) signal, rate = soundfile.read(filename, dtype="int16") self.utt2rir[utt] = (signal, rate) elif filetype == "sound.hdf5": self.utt2rir = SoundHDF5File(utt2rir, "r") else: raise NotImplementedError(filetype)
def test_sound_hdf5_file(self): tmpdir = Path(tempfile.mkdtemp()) for fmt in ['flac', 'wav']: valid = { 'a': np.random.randint(-100, 100, 25, dtype=np.int16), 'b': np.random.randint(-1000, 1000, 100, dtype=np.int16) } # Note: Specify the file format by extension p = tmpdir.joinpath('test.{}.h5'.format(fmt)) p.touch(exist_ok=True) p = str(p.resolve()) f = SoundHDF5File(p, 'a') for k, v in valid.items(): f[k] = (v, 8000) for k, v in valid.items(): t, r = f[k] self.assertEqual(r, 8000) self.assertAllEqual(t, v)
def __iter__(self): if self.filetype == 'mat': with kaldiio.ReadHelper(self.rspecifier) as reader: for key, array in reader: if self.return_shape: array = array.shape yield key, array elif self.filetype == 'sound': if ':' not in self.rspecifier: raise ValueError('Give "rspecifier" such as "scp:some.scp: {}"' .format(self.rspecifier)) ark_or_scp, filepath = self.rspecifier.split(':', 1) if ark_or_scp != 'scp': raise ValueError('Only supporting "scp" for sound file: {}' .format(ark_or_scp)) with io.open(filepath, 'r', encoding='utf-8') as f: for line in f: key, sound_file_path = line.rstrip().split(None, 1) # Assume PCM16 array, rate = soundfile.read(sound_file_path, dtype='int16') # Change Tuple[ndarray, int] -> Tuple[int, ndarray] # (soundfile style -> scipy style) if self.return_shape: array = array.shape yield key, (rate, array) elif self.filetype in ['hdf5', 'sound.hdf5']: if ':' not in self.rspecifier: raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"' .format(self.rspecifier)) ark_or_scp, filepath = self.rspecifier.split(':', 1) if ark_or_scp not in ['ark', 'scp']: raise ValueError('Must be scp or ark: {}'.format(ark_or_scp)) if ark_or_scp == 'scp': hdf5_dict = {} with io.open(filepath, 'r', encoding='utf-8') as f: for line in f: key, value = line.rstrip().split(None, 1) if ':' not in value: raise RuntimeError( 'scp file for hdf5 should be like: ' '"uttid filepath.h5:key": {}({})' .format(line, filepath)) path, h5_key = value.split(':', 1) hdf5_file = hdf5_dict.get(path) if hdf5_file is None: if self.filetype == 'sound.hdf5': hdf5_file = SoundHDF5File(path, 'r') else: hdf5_file = h5py.File(path, 'r') hdf5_dict[path] = hdf5_file if self.filetype == 'sound.hdf5': # Change Tuple[ndarray, int] -> Tuple[int, ndarray] # (soundfile style -> scipy style) array, rate = hdf5_file[h5_key] if self.return_shape: array = array.shape yield key, (rate, array) else: if self.return_shape: yield key, hdf5_file[h5_key].shape else: yield key, hdf5_file[h5_key][()] # Closing all files for k in hdf5_dict: hdf5_dict[k].close() else: if filepath == '-': # Required h5py>=2.9 if PY2: filepath = io.BytesIO(sys.stdin.read()) else: filepath = io.BytesIO(sys.stdin.buffer.read()) if self.filetype == 'sound.hdf5': for key, (r, a) in SoundHDF5File(filepath, 'r').items(): if self.return_shape: a = a.shape yield key, (r, a) else: with h5py.File(filepath, 'r') as f: for key in f: if self.return_shape: yield key, f[key].shape else: yield key, f[key][()] else: raise ValueError( 'Not supporting: filetype={}'.format(self.filetype))
def __init__(self, wspecifier, filetype='mat', write_num_frames=None, compress=False, compression_method=2, pcm_format='wav'): self.writer_scp = None # Used for writing scp self.filename = None self.filetype = filetype # Used for filetype='sound' or 'sound.hdf5' self.pcm_format = pcm_format self.kwargs = {} if filetype == 'mat': if compress: self.writer = kaldiio.WriteHelper( wspecifier, compression_method=compression_method) else: self.writer = kaldiio.WriteHelper(wspecifier) elif filetype in ['hdf5', 'sound.hdf5', 'sound']: # 1. Create spec_dict # e.g. # ark,scp:out.ark,out.scp -> {'ark': 'out.ark', 'scp': 'out.scp'} ark_scp, filepath = wspecifier.split(':', 1) if ark_scp not in ['ark', 'scp,ark', 'ark,scp']: raise ValueError( '{} is not allowed: {}'.format(ark_scp, wspecifier)) ark_scps = ark_scp.split(',') filepaths = filepath.split(',') if len(ark_scps) != len(filepaths): raise ValueError( 'Mismatch: {} and {}'.format(ark_scp, filepath)) spec_dict = dict(zip(ark_scps, filepaths)) # 2. Set writer self.filename = spec_dict['ark'] if filetype == 'sound.hdf5': self.writer = SoundHDF5File(spec_dict['ark'], 'w', format=self.pcm_format) elif filetype == 'hdf5': self.writer = h5py.File(spec_dict['ark'], 'w') elif filetype == 'sound': # Use "ark" value as directory to save wav files # e.g. ark,scp:dirname,wav.scp # -> The wave files are found in dirname/*.wav wavdir = spec_dict['ark'] if not os.path.exists(wavdir): os.makedirs(wavdir) self.writer = None else: # Cannot reach raise RuntimeError # 3. Set writer_scp if 'scp' in spec_dict: self.writer_scp = io.open( spec_dict['scp'], 'w', encoding='utf-8') else: raise ValueError('Not supporting: filetype={}'.format(filetype)) if write_num_frames is not None: if ':' not in write_num_frames: raise ValueError('Must include ":", write_num_frames={}' .format(write_num_frames)) nframes_type, nframes_file = write_num_frames.split(':', 1) if nframes_type != 'ark,t': raise ValueError( 'Only supporting text mode. ' 'e.g. --write-num-frames=ark,t:foo.txt :' '{}'.format(nframes_type)) self.writer_nframe = io.open(nframes_file, 'w', encoding='utf-8') else: self.writer_nframe = None