def __init__(self, path, kaldi_dtype): super(_KaldiSimpleWriter, self).__init__(path, kaldi_dtype) kaldi_dtype = KaldiDataType(kaldi_dtype) instance = self._dtype_to_cls[kaldi_dtype.value]() if not instance.Open(path): raise IOError('Unable to open for write') self._internal = instance self.binary &= self._internal.IsBinary()
def __init__(self, path, kaldi_dtype, utt2spk=''): super(_KaldiRandomAccessSimpleReader, self).__init__( path, kaldi_dtype, utt2spk=utt2spk) kaldi_dtype = KaldiDataType(kaldi_dtype) instance = self._dtype_to_cls[kaldi_dtype.value]() if not instance.Open(path, utt2spk): raise IOError('Unable to open for random access read') self._internal = instance self.binary &= self._internal.IsBinary()
def __init__(self, path, kaldi_dtype): super(_KaldiSequentialSimpleReader, self).__init__(path, kaldi_dtype) kaldi_dtype = KaldiDataType(kaldi_dtype) instance = self._dtype_to_cls[kaldi_dtype.value]() if self.background: opened = instance.OpenThreaded(path) else: opened = instance.Open(path) if not opened: raise IOError('Unable to open for sequential read') self._internal = instance self.binary &= self._internal.IsBinary()
def open_table_stream(path, kaldi_dtype, mode='r', error_on_str=True, utt2spk='', value_style='b', cache=False): '''Factory function to open a kaldi table This function finds the correct `KaldiTable` according to the args `kaldi_dtype` and `mode`. Specific combinations allow for optional parameters outlined by the table below +------+-------------+---------------------+ | mode | kaldi_dtype | additional kwargs | +======+=============+=====================+ |`'r'` | `'wm'` | `value_style='b'` | +------+-------------+---------------------+ |`'r+'`| * | `utt2spk=''` | +------+-------------+---------------------+ |`'r+'`| `'wm'` | `value_style='b'` | +------+-------------+---------------------+ |`'w'` | `'tv'` | `error_on_str=True` | +------+-------------+---------------------+ Parameters ---------- path : str The specifier used by kaldi to open the script. Generally these will take the form ``"{ark|scp}:<path_to_file>"``, though they can take much more interesting forms (like pipes). More information can be found on the `Kaldi website <http://kaldi-asr.org/doc2/io.html>`_. kaldi_dtype : KaldiDataType The type of data the table is expected to handle. mode : {'r', 'r+', 'w'}, optional Specifies the type of access to be performed: read sequential, read random, or write. They are implemented by subclasses of `KaldiSequentialReader`, `KaldiRandomAccessReader`, or `KaldiWriter`, resp. error_on_str : bool, optional Token vectors (`'tv'`) accept sequences of whitespace-free ASCII/UTF strings. A `str` is also a sequence of characters, which may satisfy the token requirements. If `error_on_str=True`, a `ValueError` is raised when writing a `str` as a token vector. Otherwise a `str` can be written. utt2spk : str, optional If set, the reader uses `utt2spk` as a map from utterance ids to speaker ids. The data in `path`, which are assumed to be referenced by speaker ids, can then be refrenced by utterance. If `utt2spk` is unspecified, the keys in `path` are used to query for data. value_style : str of {'b', 's', 'd'}, optional `wm` readers can provide not only the audio buffer (`'b'`) of a wave file, but its sampling rate (`'s'`), and/or duration (in sec, `'d'`). Setting `value_style` to some combination of `'b'`, `'s'`, and/or `'d'` will cause the reader to return a tuple of that information. If `value_style` is only one character, the result will not be contained in a tuple. cache : bool Whether to cache all values in a dict as they are retrieved. Only applicable to random access readers. This can be very expensive for large tables and redundant if reading from an archive directly (as opposed to a script). Returns ------- KaldiTable A table, opened. Raises ------ IOError On failure to open SytemError Kaldi errors are thrown as `SystemError`s. ''' kaldi_dtype = KaldiDataType(kaldi_dtype) if mode == 'r': if kaldi_dtype.value == 'wm': table = _KaldiSequentialWaveReader(path, kaldi_dtype, value_style=value_style) else: table = _KaldiSequentialSimpleReader(path, kaldi_dtype) elif mode == 'r+': if cache: wrapper_func = _random_access_reader_memoize else: def wrapper_func(cls): return cls if kaldi_dtype.value == 'wm': table = wrapper_func(_KaldiRandomAccessWaveReader)( path, kaldi_dtype, utt2spk=utt2spk, value_style=value_style) else: table = wrapper_func(_KaldiRandomAccessSimpleReader)( path, kaldi_dtype, utt2spk=utt2spk) elif mode in ('w', 'w+'): if kaldi_dtype.value == 't': table = _KaldiTokenWriter(path, kaldi_dtype) elif kaldi_dtype.value == 'tv': table = _KaldiTokenVectorWriter(path, kaldi_dtype, error_on_str=error_on_str) else: table = _KaldiSimpleWriter(path, kaldi_dtype) else: raise ValueError( 'Invalid Kaldi I/O mode "{}" (should be one of "r","r+","w")' ''.format(mode)) return table
def __init__(self, path, kaldi_dtype): self.kaldi_dtype = KaldiDataType(kaldi_dtype) super(KaldiTable, self).__init__(path)
def write(self, obj, kaldi_dtype, error_on_str=True, write_binary=True): '''Write one object to the stream Parameters ---------- obj The object to write kaldi_dtype : pydrobert.kaldi.enums.KaldiDataType The type of object to write. The default is to infer this error_on_str : bool Token vectors (`'tv'`) accept sequences of whitespace-free ASCII/UTF strings. A `str` is also a sequence of characters, which may satisfy the token requirements. If `error_on_str=True`, a `ValueError` is raised when writing a `str` as a token vector. Otherwise a `str` can be written write_binary : bool, optional The object will be written as binary (True) or text (False). Raises ------ ValueError If unable to determine a proper data type IOError See Also -------- pydrobert.kaldi.io.util.infer_kaldi_data_type Illustrates how different inputs are mapped to data types. ''' if self.closed: raise IOError('I/O operation on closed file.') if kaldi_dtype is None: kaldi_dtype = infer_kaldi_data_type(obj) if kaldi_dtype is None: raise ValueError( 'Unable to find kaldi data type for {}'.format(obj)) else: kaldi_dtype = KaldiDataType(kaldi_dtype) try: if kaldi_dtype == KaldiDataType.WaveMatrix: self._internal.WriteWaveData(obj[0], float(obj[1])) elif kaldi_dtype == KaldiDataType.Token: try: obj = obj.tolist() except AttributeError: pass self._internal.WriteToken(write_binary, obj) elif kaldi_dtype == KaldiDataType.TokenVector: try: obj = obj.tolist() except AttributeError: pass if error_on_str and (isinstance(obj, str) or isinstance(obj, text)): raise ValueError( 'Expected list of tokens, got string. If you want ' 'to treat strings as lists of character-wide tokens, ' 'set error_on_str to False when opening') self._internal.WriteTokenVector(obj) elif kaldi_dtype.is_basic: if kaldi_dtype == KaldiDataType.Int32: self._internal.WriteInt32(write_binary, obj) elif kaldi_dtype == KaldiDataType.Int32Vector: self._internal.WriteInt32Vector(write_binary, obj) elif kaldi_dtype == KaldiDataType.Int32VectorVector: self._internal.WriteInt32VectorVector(write_binary, obj) elif kaldi_dtype == KaldiDataType.Int32PairVector: self._internal.WriteInt32PairVector(write_binary, obj) elif kaldi_dtype == KaldiDataType.Double: self._internal.WriteDouble(write_binary, obj) elif kaldi_dtype == KaldiDataType.Base: self._internal.WriteBaseFloat(write_binary, obj) elif kaldi_dtype == KaldiDataType.BasePairVector: self._internal.WriteBaseFloatPairVector(write_binary, obj) else: self._internal.WriteBool(write_binary, obj) elif kaldi_dtype.is_num_vector: if kaldi_dtype.is_double: self._internal.WriteVectorDouble(write_binary, obj) else: self._internal.WriteVectorFloat(write_binary, obj) else: if kaldi_dtype.is_double: self._internal.WriteMatrixDouble(write_binary, obj) else: self._internal.WriteMatrixFloat(write_binary, obj) except RuntimeError as err: raise_from(IOError('Unable to write data'), err)
def read(self, kaldi_dtype, value_style='b', read_binary=None): '''Read in one object from the stream Parameters ---------- kaldi_dtype : pydrobert.kaldi.enums.KaldiDataType The type of object to read value_style : str of {'b', 's', 'd'}, optional `wm` readers can provide not only the audio buffer (`'b'`) of a wave file, but its sampling rate (`'s'`), and/or duration (in sec, `'d'`). Setting `value_style` to some combination of `'b'`, `'s'`, and/or `'d'` will cause the reader to return a tuple of that information. If `value_style` is only one character, the result will not be contained in a tuple read_binary : bool, optional If set, the object will be read as either binary (True) or text (False). The default behaviour is to read according to the ``binary`` attribute. Ignored if there's only one way to read the data Raises ------ IOError ''' if self.closed: raise IOError('I/O operation on closed file.') kaldi_dtype = KaldiDataType(kaldi_dtype) if read_binary is None: read_binary = self.binary try: if kaldi_dtype == KaldiDataType.WaveMatrix: if any(x not in 'bsd' for x in value_style): raise ValueError( 'value_style must be a combination of "b", "s",' ' and "d"') tup = self._internal.ReadWaveData() # (data, samp_freq) ret = [] for code in value_style: if code == 'b': ret.append(tup[0]) elif code == 's': ret.append(tup[1]) else: ret.append(tup[0].shape[1] / tup[1]) if len(ret) == 1: ret = ret[0] else: ret = tuple(ret) elif kaldi_dtype == KaldiDataType.Token: ret = self._internal.ReadToken(read_binary) elif kaldi_dtype == KaldiDataType.TokenVector: ret = self._internal.ReadTokenVector() elif kaldi_dtype.is_basic: if kaldi_dtype == KaldiDataType.Int32: ret = self._internal.ReadInt32() elif kaldi_dtype == KaldiDataType.Int32Vector: ret = self._internal.ReadInt32Vector() elif kaldi_dtype == KaldiDataType.Int32VectorVector: ret = self._internal.ReadInt32VectorVector() elif kaldi_dtype == KaldiDataType.Int32PairVector: ret = self._internal.ReadInt32PairVector() elif kaldi_dtype == KaldiDataType.Double: ret = self._internal.ReadDouble() elif kaldi_dtype == KaldiDataType.Base: ret = self._internal.ReadBaseFloat() elif kaldi_dtype == KaldiDataType.BasePairVector: ret = self._internal.ReadBaseFloatPairVector() else: ret = self._internal.ReadBool() elif kaldi_dtype.is_num_vector: if kaldi_dtype.is_double: ret = self._internal.ReadVectorDouble(read_binary) else: ret = self._internal.ReadVectorFloat(read_binary) else: if kaldi_dtype.is_double: ret = self._internal.ReadMatrixDouble(read_binary) else: ret = self._internal.ReadMatrixFloat(read_binary) except RuntimeError as err: raise_from(IOError('Unable to read data'), err) return ret