def __init__(self, table, *additional_tables, **kwargs): key_list = kwargs.pop('key_list', None) rng = kwargs.pop('rng', None) super(ShuffledData, self).__init__(table, *additional_tables, **kwargs) try: key_list = tuple(key_list) except TypeError: pass if key_list is None: _, rx_fn, rx_type, _ = parse_kaldi_input_path( self.table_specifiers[0][0]) if rx_type == RxfilenameType.InvalidInput: raise IOError('Invalid rspecifier {}'.format(rx_fn)) elif rx_type == RxfilenameType.StandardInput: raise IOError( 'Cannot infer key list from stdin (cannot reopen)') with io_open(*self.table_specifiers[0][:2]) as reader: self.key_list = tuple(reader.keys()) else: self.key_list = tuple(key_list) if self.ignore_missing: self._num_samples = None else: self._num_samples = len(self.key_list) if isinstance(rng, np.random.RandomState): self.rng = rng else: self.rng = np.random.RandomState(rng) self.table_handles = tuple( io_open(rspecifier, kdtype, mode='r+', **o_kwargs) for rspecifier, kdtype, o_kwargs in self.table_specifiers )
def __init__(self, table, *additional_tables, **kwargs): key_list = kwargs.pop('key_list', None) rng = kwargs.pop('rng', None) super(ShuffledData, self).__init__(table, *additional_tables, **kwargs) try: key_list = tuple(key_list) except TypeError: pass if key_list is None: _, rx_fn, rx_type, _ = parse_kaldi_input_path( self.table_specifiers[0][0]) if rx_type == RxfilenameType.InvalidInput: raise IOError('Invalid rspecifier {}'.format(rx_fn)) elif rx_type == RxfilenameType.StandardInput: raise IOError( 'Cannot infer key list from stdin (cannot reopen)') with io_open(*self.table_specifiers[0][:2]) as reader: self.key_list = tuple(reader.keys()) else: self.key_list = tuple(key_list) if self.ignore_missing: self._num_samples = None else: self._num_samples = len(self.key_list) if isinstance(rng, np.random.RandomState): self.rng = rng else: self.rng = np.random.RandomState(rng) self.table_handles = tuple( io_open(rspecifier, kdtype, mode='r+', **o_kwargs) for rspecifier, kdtype, o_kwargs in self.table_specifiers)
def kaldi_rxfilename_arg_type(string): '''argument type to make sure string is a valid extended readable file''' table_type, _, rxfilename_type, _ = kaldi_io_util.parse_kaldi_input_path( string) if table_type != kaldi_io_enums.TableType.NotATable: raise argparse.ArgumentTypeError( 'Expected an extended file name, got an rspecifier (starts with ' "'ark:' or 'scp:')") elif rxfilename_type == kaldi_io_enums.RxfilenameType.InvalidInput: raise argparse.ArgumentTypeError('Not a valid rxfilename') return string
def kaldi_rxfilename_arg_type(string): '''argument type to make sure string is a valid extended readable file''' table_type, _, rxfilename_type, _ = kaldi_io_util.parse_kaldi_input_path( string) if table_type != kaldi_io_enums.TableType.NotATable: raise argparse.ArgumentTypeError( 'Expected an extended file name, got an rspecifier (starts with ' "'ark:' or 'scp:')") elif rxfilename_type == kaldi_io_enums.RxfilenameType.InvalidInput: raise argparse.ArgumentTypeError('Not a valid rxfilename') return string
def __init__(self, path): self.path = path self.closed = False if self.readable(): self._table_type, self._xfilenames, self._xtypes, options = \ parse_kaldi_input_path(path) else: self._table_type, self._xfilenames, self._xtypes, options = \ parse_kaldi_output_path(path) self.binary = True for key, value in options.items(): setattr(self, key, value) super(KaldiIOBase, self).__init__()
def __init__(self, path): from pydrobert.kaldi.io.util import parse_kaldi_input_path from pydrobert.kaldi.io.util import parse_kaldi_output_path self.path = path self.closed = False if self.readable(): self._table_type, self._xfilenames, self._xtypes, options = \ parse_kaldi_input_path(path) else: self._table_type, self._xfilenames, self._xtypes, options = \ parse_kaldi_output_path(path) self.binary = True for key, value in options.items(): setattr(self, key, value) super(KaldiIOBase, self).__init__()
def __init__(self, table, *additional_tables, **kwargs): super(SequentialData, self).__init__(table, *additional_tables, **kwargs) self._num_samples = None sorteds = tuple( parse_kaldi_input_path(spec[0])[3]['sorted'] for spec in self.table_specifiers) if not all(sorteds): uns_rspec = self.table_specifiers[sorteds.index(False)][0] uns_rspec_split = uns_rspec.split(':') uns_rspec_split[0] += ',s' sor_rspec = ':'.join(uns_rspec_split) warn('SequentialData assumes data are sorted, and "{}" does ' 'not promise to be sorted. To supress this warning, ' 'check that this table is sorted, then add the sorted ' 'flag to this rspecifier ("{}")'.format(uns_rspec, sor_rspec)) if self.ignore_missing and len(self.table_specifiers) > 1: self._sample_generator_for_epoch = self._ignore_epoch else: self._sample_generator_for_epoch = self._no_ignore_epoch
def open(path, kaldi_dtype=None, mode='r', error_on_str=True, utt2spk='', value_style='b', header=True, cache=False): """Factory function for initializing and opening kaldi streams This function provides a general interface for opening kaldi streams. Kaldi streams are either simple input/output of kaldi objects (the basic stream) or key-value readers and writers (tables). When `path` starts with ``ark:`` or ``scp:`` (possibly with modifiers before the colon), a table is opened. Otherwise, a basic stream is opened. See also -------- pydrobert.kaldi.io.table_streams.open_table_stream For information on opening tables pydrobert.kaldi.io.basic.open_duck_stream For information on opening basic streams """ if 'r' in mode: table_type = parse_kaldi_input_path(path)[0] else: table_type = parse_kaldi_output_path(path)[0] if table_type == TableType.NotATable: return open_duck_stream(path, mode=mode, header=header) else: return open_table_stream(path, kaldi_dtype, mode=mode, error_on_str=error_on_str, utt2spk=utt2spk, value_style=value_style, cache=cache)
def __init__(self, table, *additional_tables, **kwargs): super(SequentialData, self).__init__( table, *additional_tables, **kwargs) self._num_samples = None sorteds = tuple( parse_kaldi_input_path(spec[0])[3]['sorted'] for spec in self.table_specifiers ) if not all(sorteds): uns_rspec = self.table_specifiers[sorteds.index(False)][0] uns_rspec_split = uns_rspec.split(':') uns_rspec_split[0] += ',s' sor_rspec = ':'.join(uns_rspec_split) warn( 'SequentialData assumes data are sorted, and "{}" does ' 'not promise to be sorted. To supress this warning, ' 'check that this table is sorted, then add the sorted ' 'flag to this rspecifier ("{}")'.format( uns_rspec, sor_rspec)) if self.ignore_missing and len(self.table_specifiers) > 1: self._sample_generator_for_epoch = self._ignore_epoch else: self._sample_generator_for_epoch = self._no_ignore_epoch
def open( path, kaldi_dtype=None, mode='r', error_on_str=True, utt2spk='', value_style='b', header=True, cache=False): """Factory function for initializing and opening kaldi streams This function provides a general interface for opening kaldi streams. Kaldi streams are either simple input/output of kaldi objects (the basic stream) or key-value readers and writers (tables). When `path` starts with ``'ark:'`` or ``'scp:'`` (possibly with modifiers before the colon), a table is opened. Otherwise, a basic stream is opened. See also -------- pydrobert.kaldi.io.table_streams.open_table_stream For information on opening tables pydrobert.kaldi.io.basic.open_duck_stream For information on opening basic streams """ from pydrobert.kaldi.io.enums import TableType from pydrobert.kaldi.io.util import parse_kaldi_input_path from pydrobert.kaldi.io.util import parse_kaldi_output_path from pydrobert.kaldi.io.duck_streams import open_duck_stream from pydrobert.kaldi.io.table_streams import open_table_stream if 'r' in mode: table_type = parse_kaldi_input_path(path)[0] else: table_type = parse_kaldi_output_path(path)[0] if table_type == TableType.NotATable: return open_duck_stream(path, mode=mode, header=header) else: return open_table_stream( path, kaldi_dtype, mode=mode, error_on_str=error_on_str, utt2spk=utt2spk, value_style=value_style, cache=cache)
def kaldi_rspecifier_arg_type(string): '''argument type to make sure string is a valid rspecifier''' table_type, _, _, _ = kaldi_io_util.parse_kaldi_input_path(string) if table_type == kaldi_io_enums.TableType.NotATable: raise argparse.ArgumentTypeError('Not a valid rspecifier') return string
def kaldi_rspecifier_arg_type(string): '''argument type to make sure string is a valid rspecifier''' table_type, _, _, _ = kaldi_io_util.parse_kaldi_input_path(string) if table_type == kaldi_io_enums.TableType.NotATable: raise argparse.ArgumentTypeError('Not a valid rspecifier') return string