Ejemplo n.º 1
0
class SequentialTableReaderArchiveImpl(object):
    def __init__(self, holder_type):
        """Initialize the reader for the given holder type.

        Args:
            holder_type: The given holder type.
        """
        self.rspecifier = None
        self.opts = None
        self.archive_rxfilename = None
        self.input = Input()
        self.type = holder_type
        self.holder = NewHolderByType(self.type)
        self.key = None
        self.state = SequentialTableReaderStateType.kUninitialized

    def Open(self, rspecifier):
        """Open a reader for the given rspecifier.

        Args:
            rspecifier: The given rspecifier.

        Returns:
            A boolean variable indicating if the operation is successful.
        """
        if self.state != SequentialTableReaderStateType.kUninitialized:
            # call Close() yourself to suppress this exception.
            if not self.Close():
                if self.opts.permissive:
                    LogWarning('Error closing previous input (only warning, '
                               'since permissive mode).')
                else:
                    LogError('Error closing previous input, rspecifier was '
                             '\"%s\"' % self.rspecifier)
        self.rspecifier = rspecifier
        (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier)
        self.archive_rxfilename = rxfilename
        self.opts = opts
        if rspecifier_type != RspecifierType.kArchiveRspecifier:
            LogError('Invalid rspecifier type \"%s\"' % rspecifier_type)
        self.input = Input()
        if self.holder.IsReadInBinary():
            success = self.input.Open(self.archive_rxfilename)
        else:
            success = self.input.OpenTextMode(self.archive_rxfilename)
        if not success:
            self.state = SequentialTableReaderStateType.kUninitialized
            LogError('Failed to open stream \"%s\"' % self.archive_rxfilename)
        self.state = SequentialTableReaderStateType.kFileStart
        self.Next()
        if self.state == SequentialTableReaderStateType.kError:
            self.input.Close()
            self.state = SequentialTableReaderStateType.kUninitialized
            LogError('Error beginning to read archive file \"%s\" (wrong '
                     'filename?)' % self.archive_rxfilename)
        if self.state != SequentialTableReaderStateType.kHaveObject and \
           self.state != SequentialTableReaderStateType.kEof:
            LogError('Invalid state \"%s\"' % self.state)
        return True

    def Next(self):
        if self.state == SequentialTableReaderStateType.kHaveObject:
            self.holder.Clear()
        elif self.state == SequentialTableReaderStateType.kFileStart or \
             self.state == SequentialTableReaderStateType.kFreedObject:
            pass
        else:
            LogError('Invalid state \"%s\"' % self.state)
        if self.input.Stream().Eof():
            self.state = SequentialTableReaderStateType.kEof
            return True
        self.key = ReadToken(self.input.Stream(), self.input.IsBinary(), False)
        c = self.input.Stream().Peek(1)
        # We expect a space ' ' after the key. We also allow tab, just so we
        # can read archives generated by scripts that may not be fully aware
        # of how this format works.
        if c != ' ' and c != '\t' and c != '\n':
            LogError('Invalid archive file format: expected space after key '
                     '\"%s\", got character \"%s\" when reading archive '
                     '\"%s\".' % (self.key, c, self.archive_rxfilename))
        if c != '\n':  # Consume the space or tab.
            self.input.Stream().Read(1)
        binary = InitKaldiInputStream(self.input.Stream())
        if not self.holder.Read(self.input.Stream(), binary):
            self.holder.Clear()
            LogError('Failed to read object from archive \"%s\"' %
                     self.archive_rxfilename)
        self.state = SequentialTableReaderStateType.kHaveObject
        return True

    def IsOpen(self):
        if self.state == SequentialTableReaderStateType.kEof or \
           self.state == SequentialTableReaderStateType.kHaveObject or \
           self.state == SequentialTableReaderStateType.kFreedObject:
            return True
        elif self.state == SequentialTableReaderStateType.kUninitialized:
            return False
        else:
            # note: kFileStart is not a valid state for the user to call a
            # member function (we never return from a public function in
            # this state).
            LogError('Invalid state \"%s\"' % self.state)

    def Done(self):
        if self.state == SequentialTableReaderStateType.kHaveObject:
            return False
        elif self.state == SequentialTableReaderStateType.kEof or \
             self.state == SequentialTableReaderStateType.kError:
            # Error condition, like Eof, counts as Done(); the
            # destructor/Close() will inform the user of the error.
            return True
        else:
            LogError('Invalid state \"%s\"' % self.state)

    def Key(self):
        if self.state != SequentialTableReaderStateType.kHaveObject:
            LogError('Invalid state \"%s\"' % self.state)
        return self.key

    def Value(self):
        if self.state != SequentialTableReaderStateType.kHaveObject:
            LogError('Invalid state \"%s\"' % self.state)
        return self.holder.Value()

    def Close(self):
        if not self.IsOpen():
            LogError('Called on input that was not open.')
        status = 0
        if self.input.IsOpen():
            status = self.input.Close()
        if self.state == SequentialTableReaderStateType.kHaveObject:
            self.holder.Clear()
        old_state = self.state
        self.state = SequentialTableReaderStateType.kUninitialized
        if old_state == SequentialTableReaderStateType.kError or \
            (old_state == SequentialTableReaderStateType.kEof and
             status != 0):
            if self.opts.permissive:
                LogWarning('Error state detected closing reader. Ignoring '
                           'it because you specified permissive mode.')
                return True
            else:
                return False
        else:
            return True
Ejemplo n.º 2
0
class RandomAccessTableReaderScriptImpl(object):
    """RandomAccessTableReaderScriptImpl is for random-access reading of
    archives when a script file is specified. For simplicity we just read it in
    all in one go, as it's unlikely someone would generate this from a pipe.
    In principle we could read it on-demand as for the archives, but this would
    probably be overkill.
    """
    def __init__(self, holder_type):
        self.input = Input()
        self.opts = None
        self.rspecifier = None
        self.script = None
        self.keys = None
        self.script_rxfilename = None
        self.key = None
        self.type = holder_type
        self.holder = NewHolderByType(self.type)
        self.data_rxfilename = None
        self.last_found = 0
        self.state = SequentialTableReaderStateType.kUninitialized

    def Open(self, rspecifier):
        """Open a reader for the given rspecifier.

        Args:
            rspecifier: The given rspecifier.

        Returns:
            A boolean variable indicating if the operation is successful.
        """
        # You may call Open from states kUninitialized and kError.
        # It may leave the object in any of the states.
        if self.state == RandomAccessTableReaderStateType.kNoObject or \
           self.state == RandomAccessTableReaderStateType.kHaveObject:
            # call Close() yourself to suppress this exception.
            if not self.Close():
                LogError(
                    'Error closing previous input, rspecifier was \"%s\"' %
                    self.rspecifier)
        self.rspecifier = rspecifier
        (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier)
        self.script_rxfilename = rxfilename
        self.opts = opts
        if rspecifier_type != RspecifierType.kScriptRspecifier:
            LogError('Invalid rspecifier type \"%s\"' % rspecifier_type)

        script_input = Input()
        if not script_input.Open(self.script_rxfilename):
            LogError('Failed opening script file \"%s\"' %
                     self.script_rxfilename)
        if script_input.IsBinary():
            LogError('script file should not be in binary format.')

        script = list()
        while True:
            line = script_input.Stream().Readline()
            if not line:
                break
            token = line.rstrip().split()
            if len(token) != 2:
                LogError('Invalid line \"%s\"' % line)
            script.append((token[0], token[1]))
        self.script = sorted(script, key=itemgetter(0))
        self.keys = [key for key, _ in self.script]

        self.state = RandomAccessTableReaderStateType.kNoObject
        self.key = None

        return True

    def IsOpen(self):
        if self.state == RandomAccessTableReaderStateType.kNoObject or \
           self.state == RandomAccessTableReaderStateType.kHaveObject:
            return True
        else:
            return False

    def Close(self):
        if not self.IsOpen():
            LogError('Called on input that was not open.')
        self.input.Close()
        self.holder.Clear()
        self.last_found = 0
        self.script = None
        self.key = None
        self.data_rxfilename = None
        self.state = SequentialTableReaderStateType.kUninitialized
        return True

    def HasKey(self, key):
        preload = self.opts.permissive
        return self.HasKeyInternal(key, preload)

    def Value(self, key):
        if not self.HasKeyInternal(key, True):
            LogError('Could not get item for key = %s' % key)
        return self.holder.Value()

    def HasKeyInternal(self, key, preload):
        if self.state == SequentialTableReaderStateType.kUninitialized or \
           self.state == SequentialTableReaderStateType.kError:
            LogError(
                'Called on RandomAccessTableReader object that is not open.')
        elif self.state == RandomAccessTableReaderStateType.kHaveObject:
            if key == self.key:
                return True
        else:
            pass

        if not self.LookupKey(key):
            return False
        else:
            if not preload:
                return True
            else:
                data_rxfilename = self.script[self.last_found][1]
                if self.state == RandomAccessTableReaderStateType.kHaveObject and \
                   data_rxfilename != self.data_rxfilename:
                    self.state = RandomAccessTableReaderStateType.kNoObject
                    self.holder.Clear()
                self.key = key
                self.data_rxfilename = data_rxfilename
                if self.state == RandomAccessTableReaderStateType.kNoObject:
                    success = self.input.Open(self.data_rxfilename)
                    if not success:
                        LogError('Failed to open file \"%s\"' %
                                 self.data_rxfilename)
                        return False
                    else:
                        if self.holder.Read(self.input.Stream(),
                                            self.input.IsBinary()):
                            self.state = RandomAccessTableReaderStateType.kHaveObject
                        else:
                            LogError('Failed to load object from \"%s\"' %
                                     self.data_rxfilename)
                            return False
            return True

    def LookupKey(self, key):
        for i in xrange(2):
            if self.last_found < len(self.script) and \
               self.script[self.last_found][0] == key:
                return True
            self.last_found += 1
        self.last_found -= 1

        idx = bisect.bisect(self.keys, key) - 1
        if self.keys[idx] == key:
            self.last_found = idx
            return True
        else:
            return False
Ejemplo n.º 3
0
class SequentialTableReaderScriptImpl(object):
    def __init__(self, holder_type):
        """Initialize the reader for the given holder type.

        Args:
            holder_type: The given holder type.
        """
        self.rspecifier = None
        self.opts = None
        self.script_rxfilename = None
        self.script_input = Input()
        self.data_input = Input()
        self.type = holder_type
        self.holder = NewHolderByType(self.type)
        self.range_holder = NewHolderByType(self.type)
        self.key = None
        self.data_rxfilename = None
        self.range = None
        self.state = SequentialTableReaderStateType.kUninitialized

    def Open(self, rspecifier):
        """Open a reader for the given rspecifier.

        Args:
            rspecifier: The given rspecifier.

        Returns:
            A boolean variable indicating if the operation is successful.
        """
        # You may call Open from states kUninitialized and kError.
        # It may leave the object in any of the states.
        if self.state != SequentialTableReaderStateType.kUninitialized and \
           self.state != SequentialTableReaderStateType.kError:
            # call Close() yourself to suppress this exception.
            if not self.Close():
                LogError(
                    'Error closing previous input, rspecifier was \"%s\"' %
                    self.rspecifier)
        self.rspecifier = rspecifier
        (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier)
        self.script_rxfilename = rxfilename
        self.opts = opts
        if rspecifier_type != RspecifierType.kScriptRspecifier:
            LogError('Invalid rspecifier type \"%s\"' % rspecifier_type)
        self.script_input = Input()
        if not self.script_input.Open(self.script_rxfilename):
            LogError('Failed opening script file \"%s\"' %
                     self.script_rxfilename)
        if self.script_input.IsBinary():
            self.SetErrorState()
            LogError('script file should not be in binary format.')
        else:
            self.state = SequentialTableReaderStateType.kFileStart
            self.Next()
            if self.state == SequentialTableReaderStateType.kError:
                return False
            # any other status, including kEof, is OK from the point of view
            # of the 'open' function (empty scp file is not inherently an
            # error).
            return True

    def IsOpen(self):
        if self.state == SequentialTableReaderStateType.kEof or \
           self.state == SequentialTableReaderStateType.kHaveScpLine or \
           self.state == SequentialTableReaderStateType.kHaveObject or \
           self.state == SequentialTableReaderStateType.kHaveRange:
            return True
        elif self.state == SequentialTableReaderStateType.kUninitialized or \
             self.state == SequentialTableReaderStateType.kError:
            return False
        else:
            # note: kFileStart is not a valid state for the user to call a
            # member function (we never return from a public function in
            # this state).
            LogError('Invalid state \"%s\"' % self.state)

    def Done(self):
        if self.state == SequentialTableReaderStateType.kHaveScpLine or \
           self.state == SequentialTableReaderStateType.kHaveObject or \
           self.state == SequentialTableReaderStateType.kHaveRange:
            return False
        elif self.state == SequentialTableReaderStateType.kEof or \
             self.state == SequentialTableReaderStateType.kError:
            # Error condition, like Eof, counts as Done(); the
            # destructor/Close() will inform the user of the error.
            return True
        else:
            LogError('Invalid state \"%s\"' % self.state)

    def Key(self):
        if self.state != SequentialTableReaderStateType.kHaveScpLine and \
           self.state != SequentialTableReaderStateType.kHaveObject and \
           self.state != SequentialTableReaderStateType.kHaveRange:
            LogError('Invalid state \"%s\"' % self.state)
        return self.key

    def Value(self):
        if not self.EnsureObjectLoaded():
            LogError('Failed to load object from \"%s\" to suppress this '
                     'error, add the permissive (p, ) option to the '
                     'rspecifier.' % self.data_rxfilename)
        if self.state == SequentialTableReaderStateType.kHaveRange:
            return self.range_holder.Value()
        elif self.state == SequentialTableReaderStateType.kHaveObject:
            return self.holder.Value()
        else:
            LogError('Invalid state \"%s\"' % self.state)

    def Next(self):
        while True:
            self.NextScpLine()
            if self.Done():
                return
            if self.opts.permissive:
                # Permissive mode means, when reading scp files, we treat keys
                # whose scp entry cannot be read as nonexistent.  This means
                # trying to read.
                if self.EnsureObjectLoaded():
                    return  # Success.
                # else try the next scp line.
            else:
                # We go the next key; Value() will crash if we can't read the
                # object on the scp line.
                return

    def Close(self):
        status = 0
        if self.script_input.IsOpen():
            status = self.script_input.Close()
        if self.data_input.IsOpen():
            self.data_input.Close()
        self.range_holder.Clear()
        self.holder.Clear()
        if not self.IsOpen():
            LogError('Called on input that was not open.')

    def SetErrorState(self):
        self.state = SequentialTableReaderStateType.kError
        self.script_input.Close()
        self.data_input.Close()
        self.holder.Clear()
        self.range_holder.Clear()
        return True

    def NextScpLine(self):
        if self.state == SequentialTableReaderStateType.kHaveRange:
            sefl.range_holder.Clear()
            sefl.state = SequentialTableReaderStateType.kHaveObject
        if self.state != SequentialTableReaderStateType.kHaveScpLine and \
           self.state != SequentialTableReaderStateType.kHaveObject and \
           self.state != SequentialTableReaderStateType.kFileStart:
            LogError('Invalid state \"%s\"' % self.state)
        line = self.script_input.Stream().Readline()
        if line:
            token = line.rstrip().split()
            if len(token) != 2:
                LogError('Invalid line \"%s\"' % line)
            self.key = token[0]
            data_rxfilename = None
            if token[1].endswith(']'):
                LogError('Range specifier support not implemented yet.')
            else:
                data_rxfilename = token[1]
                self.range = None
                filenames_equal = (self.data_rxfilename == data_rxfilename)
                if not filenames_equal:
                    self.data_rxfilename = data_rxfilename
                if self.state == SequentialTableReaderStateType.kHaveObject:
                    if not filenames_equal:
                        self.holder.Clear()
                        self.state = SequentialTableReaderStateType.kHaveScpLine
                else:
                    self.state = SequentialTableReaderStateType.kHaveScpLine
        else:
            self.state = SequentialTableReaderStateType.kEof
            # There is nothing more in the scp file. Might as well close input
            # streams as we don't need them.
            self.script_input.Close()
            if self.data_input.IsOpen():
                self.data_input.Close()
            self.holder.Clear()  # clear the holder if it was nonempty.
            self.range_holder.Clear(
            )  # clear the range holder if it was nonempty.

    def EnsureObjectLoaded(self):
        """Ensures that we have fully loaded any object associated with the current key.

        Returns:
            A boolean variable indicating if the operation is successful.
        """
        if self.state != SequentialTableReaderStateType.kHaveScpLine and \
           self.state != SequentialTableReaderStateType.kHaveObject and \
           self.state != SequentialTableReaderStateType.kHaveRange:
            LogError('Invalid state \"%s\"' % self.state)
        if self.state == SequentialTableReaderStateType.kHaveScpLine:
            success = self.data_input.Open(self.data_rxfilename)
            if not success:
                LogError('Failed to open file \"%s\"' % self.data_rxfilename)
            if self.holder.Read(self.data_input.Stream(),
                                self.data_input.IsBinary()):
                self.state = SequentialTableReaderStateType.kHaveObject
            else:
                LogError('Failed to load object from \"%s\"' %
                         self.data_rxfilename)
        # At this point the state must be either kHaveObject or kHaveRange.
        if self.range:
            LogError('Range specifier support not implemented yet.')
        return True