Example #1
0
 def __init__(self, table, *additional_tables, **kwargs):
     key_list = kwargs.pop('key_list', None)
     rng = kwargs.pop('rng', None)
     super(ShuffledData, self).__init__(table, *additional_tables, **kwargs)
     try:
         key_list = tuple(key_list)
     except TypeError:
         pass
     if key_list is None:
         _, rx_fn, rx_type, _ = parse_kaldi_input_path(
             self.table_specifiers[0][0])
         if rx_type == RxfilenameType.InvalidInput:
             raise IOError('Invalid rspecifier {}'.format(rx_fn))
         elif rx_type == RxfilenameType.StandardInput:
             raise IOError(
                 'Cannot infer key list from stdin (cannot reopen)')
         with io_open(*self.table_specifiers[0][:2]) as reader:
             self.key_list = tuple(reader.keys())
     else:
         self.key_list = tuple(key_list)
     if self.ignore_missing:
         self._num_samples = None
     else:
         self._num_samples = len(self.key_list)
     if isinstance(rng, np.random.RandomState):
         self.rng = rng
     else:
         self.rng = np.random.RandomState(rng)
     self.table_handles = tuple(
         io_open(rspecifier, kdtype, mode='r+', **o_kwargs)
         for rspecifier, kdtype, o_kwargs in self.table_specifiers
     )
Example #2
0
 def __init__(self, table, *additional_tables, **kwargs):
     key_list = kwargs.pop('key_list', None)
     rng = kwargs.pop('rng', None)
     super(ShuffledData, self).__init__(table, *additional_tables, **kwargs)
     try:
         key_list = tuple(key_list)
     except TypeError:
         pass
     if key_list is None:
         _, rx_fn, rx_type, _ = parse_kaldi_input_path(
             self.table_specifiers[0][0])
         if rx_type == RxfilenameType.InvalidInput:
             raise IOError('Invalid rspecifier {}'.format(rx_fn))
         elif rx_type == RxfilenameType.StandardInput:
             raise IOError(
                 'Cannot infer key list from stdin (cannot reopen)')
         with io_open(*self.table_specifiers[0][:2]) as reader:
             self.key_list = tuple(reader.keys())
     else:
         self.key_list = tuple(key_list)
     if self.ignore_missing:
         self._num_samples = None
     else:
         self._num_samples = len(self.key_list)
     if isinstance(rng, np.random.RandomState):
         self.rng = rng
     else:
         self.rng = np.random.RandomState(rng)
     self.table_handles = tuple(
         io_open(rspecifier, kdtype, mode='r+', **o_kwargs)
         for rspecifier, kdtype, o_kwargs in self.table_specifiers)
Example #3
0
def kaldi_rxfilename_arg_type(string):
    '''argument type to make sure string is a valid extended readable file'''
    table_type, _, rxfilename_type, _ = kaldi_io_util.parse_kaldi_input_path(
        string)
    if table_type != kaldi_io_enums.TableType.NotATable:
        raise argparse.ArgumentTypeError(
            'Expected an extended file name, got an rspecifier (starts with '
            "'ark:' or 'scp:')")
    elif rxfilename_type == kaldi_io_enums.RxfilenameType.InvalidInput:
        raise argparse.ArgumentTypeError('Not a valid rxfilename')
    return string
Example #4
0
def kaldi_rxfilename_arg_type(string):
    '''argument type to make sure string is a valid extended readable file'''
    table_type, _, rxfilename_type, _ = kaldi_io_util.parse_kaldi_input_path(
        string)
    if table_type != kaldi_io_enums.TableType.NotATable:
        raise argparse.ArgumentTypeError(
            'Expected an extended file name, got an rspecifier (starts with '
            "'ark:' or 'scp:')")
    elif rxfilename_type == kaldi_io_enums.RxfilenameType.InvalidInput:
        raise argparse.ArgumentTypeError('Not a valid rxfilename')
    return string
Example #5
0
 def __init__(self, path):
     self.path = path
     self.closed = False
     if self.readable():
         self._table_type, self._xfilenames, self._xtypes, options = \
             parse_kaldi_input_path(path)
     else:
         self._table_type, self._xfilenames, self._xtypes, options = \
             parse_kaldi_output_path(path)
     self.binary = True
     for key, value in options.items():
         setattr(self, key, value)
     super(KaldiIOBase, self).__init__()
Example #6
0
 def __init__(self, path):
     from pydrobert.kaldi.io.util import parse_kaldi_input_path
     from pydrobert.kaldi.io.util import parse_kaldi_output_path
     self.path = path
     self.closed = False
     if self.readable():
         self._table_type, self._xfilenames, self._xtypes, options = \
             parse_kaldi_input_path(path)
     else:
         self._table_type, self._xfilenames, self._xtypes, options = \
             parse_kaldi_output_path(path)
     self.binary = True
     for key, value in options.items():
         setattr(self, key, value)
     super(KaldiIOBase, self).__init__()
Example #7
0
 def __init__(self, table, *additional_tables, **kwargs):
     super(SequentialData, self).__init__(table, *additional_tables,
                                          **kwargs)
     self._num_samples = None
     sorteds = tuple(
         parse_kaldi_input_path(spec[0])[3]['sorted']
         for spec in self.table_specifiers)
     if not all(sorteds):
         uns_rspec = self.table_specifiers[sorteds.index(False)][0]
         uns_rspec_split = uns_rspec.split(':')
         uns_rspec_split[0] += ',s'
         sor_rspec = ':'.join(uns_rspec_split)
         warn('SequentialData assumes data are sorted, and "{}" does '
              'not promise to be sorted. To supress this warning, '
              'check that this table is sorted, then add the sorted '
              'flag to this rspecifier ("{}")'.format(uns_rspec, sor_rspec))
     if self.ignore_missing and len(self.table_specifiers) > 1:
         self._sample_generator_for_epoch = self._ignore_epoch
     else:
         self._sample_generator_for_epoch = self._no_ignore_epoch
Example #8
0
def open(path,
         kaldi_dtype=None,
         mode='r',
         error_on_str=True,
         utt2spk='',
         value_style='b',
         header=True,
         cache=False):
    """Factory function for initializing and opening kaldi streams

    This function provides a general interface for opening kaldi
    streams. Kaldi streams are either simple input/output of kaldi
    objects (the basic stream) or key-value readers and writers
    (tables).

    When `path` starts with ``ark:`` or ``scp:`` (possibly with
    modifiers before the colon), a table is opened. Otherwise, a basic
    stream is opened.

    See also
    --------
    pydrobert.kaldi.io.table_streams.open_table_stream
        For information on opening tables
    pydrobert.kaldi.io.basic.open_duck_stream
        For information on opening basic streams
    """
    if 'r' in mode:
        table_type = parse_kaldi_input_path(path)[0]
    else:
        table_type = parse_kaldi_output_path(path)[0]
    if table_type == TableType.NotATable:
        return open_duck_stream(path, mode=mode, header=header)
    else:
        return open_table_stream(path,
                                 kaldi_dtype,
                                 mode=mode,
                                 error_on_str=error_on_str,
                                 utt2spk=utt2spk,
                                 value_style=value_style,
                                 cache=cache)
Example #9
0
 def __init__(self, table, *additional_tables, **kwargs):
     super(SequentialData, self).__init__(
         table, *additional_tables, **kwargs)
     self._num_samples = None
     sorteds = tuple(
         parse_kaldi_input_path(spec[0])[3]['sorted']
         for spec in self.table_specifiers
     )
     if not all(sorteds):
         uns_rspec = self.table_specifiers[sorteds.index(False)][0]
         uns_rspec_split = uns_rspec.split(':')
         uns_rspec_split[0] += ',s'
         sor_rspec = ':'.join(uns_rspec_split)
         warn(
             'SequentialData assumes data are sorted, and "{}" does '
             'not promise to be sorted. To supress this warning, '
             'check that this table is sorted, then add the sorted '
             'flag to this rspecifier ("{}")'.format(
                 uns_rspec, sor_rspec))
     if self.ignore_missing and len(self.table_specifiers) > 1:
         self._sample_generator_for_epoch = self._ignore_epoch
     else:
         self._sample_generator_for_epoch = self._no_ignore_epoch
Example #10
0
def open(
        path, kaldi_dtype=None, mode='r', error_on_str=True,
        utt2spk='', value_style='b', header=True, cache=False):
    """Factory function for initializing and opening kaldi streams

    This function provides a general interface for opening kaldi
    streams. Kaldi streams are either simple input/output of kaldi
    objects (the basic stream) or key-value readers and writers
    (tables).

    When `path` starts with ``'ark:'`` or ``'scp:'`` (possibly with
    modifiers before the colon), a table is opened. Otherwise, a basic
    stream is opened.

    See also
    --------
    pydrobert.kaldi.io.table_streams.open_table_stream
        For information on opening tables
    pydrobert.kaldi.io.basic.open_duck_stream
        For information on opening basic streams
    """
    from pydrobert.kaldi.io.enums import TableType
    from pydrobert.kaldi.io.util import parse_kaldi_input_path
    from pydrobert.kaldi.io.util import parse_kaldi_output_path
    from pydrobert.kaldi.io.duck_streams import open_duck_stream
    from pydrobert.kaldi.io.table_streams import open_table_stream
    if 'r' in mode:
        table_type = parse_kaldi_input_path(path)[0]
    else:
        table_type = parse_kaldi_output_path(path)[0]
    if table_type == TableType.NotATable:
        return open_duck_stream(path, mode=mode, header=header)
    else:
        return open_table_stream(
            path, kaldi_dtype, mode=mode, error_on_str=error_on_str,
            utt2spk=utt2spk, value_style=value_style, cache=cache)
Example #11
0
def kaldi_rspecifier_arg_type(string):
    '''argument type to make sure string is a valid rspecifier'''
    table_type, _, _, _ = kaldi_io_util.parse_kaldi_input_path(string)
    if table_type == kaldi_io_enums.TableType.NotATable:
        raise argparse.ArgumentTypeError('Not a valid rspecifier')
    return string
Example #12
0
def kaldi_rspecifier_arg_type(string):
    '''argument type to make sure string is a valid rspecifier'''
    table_type, _, _, _ = kaldi_io_util.parse_kaldi_input_path(string)
    if table_type == kaldi_io_enums.TableType.NotATable:
        raise argparse.ArgumentTypeError('Not a valid rspecifier')
    return string