Esempio n. 1
0
    def _decode_raw(self, data):
        '''
        @see AudioInput._decode_raw()
        '''
        # Decode the raw bytes
        self._decoder.start_utt()
        self._decoder.process_raw(data, False, True)
        self._decoder.end_utt()

        tokens = []
        for seg in self._decoder.seg():
            word = seg.word
            prob = seg.prob
            vrbl = True

            # Start and end tokens
            if word is '<s>' or word is '</s>':
                continue

            # Non-verbal tokens
            if ('<' in word or '>' in word or '[' in word or ']' in word):
                vrbl = False

            # Strip any "(...)" appendage which details the path
            if '(' in word:
                word = word[:word.index('(')]

            # Save as a token in the result
            tokens.append(Token(word, prob, vrbl))

        # We're done!
        return tokens
Esempio n. 2
0
    def _handle(self, sckt):
        '''
        Handle reading from a socket
        '''
        LOG.info("Started new socket handler")

        # We'll build these up
        tokens = []
        cur = b''

        # Loop until they go away
        while True:
            c = sckt.recv(1)
            if c is None or len(c) == 0:
                LOG.info("Peer closed connection")
                return

            if len(cur) == 0 and ord(c) == 4:
                LOG.info("Got EOT")
                try:
                    sckt.close()
                except:
                    pass
                return

            if c in b' \t\n':
                if len(cur) > 0:
                    tokens.append(Token(cur.strip().decode(), 1.0, True))
                    cur = b''
                if c == b'\n':
                    self._output.append(tokens)
                    tokens = []

            else:
                cur += c
Esempio n. 3
0
    def _decode(self):
        """
        @see AudioInput._decode()
        """
        # Collect anything remaining
        self._add_result(self._recognizer.FinalResult())

        # Ensure it's clear for next time
        self._recognizer.Reset()

        # Tokenize
        tokens = []
        LOG.debug("Decoding: %s" % self._results)
        for result in self._results:
            word = result.get('word', '').strip()
            conf = result.get('conf', 0.0)
            if word and conf:
                tokens.append(Token(word, conf, True))

        # Done
        self._results = []

        # And give them all back
        LOG.debug("Got: %s" % ' '.join(str(i) for i in tokens))
        return tokens
Esempio n. 4
0
 def tokenize(string):
     if string and str(string).strip():
         return [Token(word.strip(), 1.0, True)
                 for word in str(string).strip().split()
                 if word]
     else:
         return []
Esempio n. 5
0
    def _decode(self):
        """
        @see AudioInput._decode()
        """
        if self._sckt is None:
            # No context means no tokens
            LOG.warning("Had no stream context to close")
            return []

        try:
            # Send the EOD token
            self._sckt.sendall(struct.pack('!q', -1))

            # Get back the result:
            #   8 bytes for the length
            #   data...
            LOG.info("Waiting for result...")
            length = b''
            while len(length) < 8:
                got = self._sckt.recv(8 - len(length))
                if len(got) == 0:
                    raise IOError("EOF in recv()")
                length += got
            (count, ) = struct.unpack("!q", length)

            # Read in the string
            LOG.info("Reading %d chars" % (count, ))
            result = b''
            while len(result) < count:
                got = self._sckt.recv(count - len(result))
                if len(got) == 0:
                    raise IOError("EOF in recv()")
                result += got
            result = result.decode()
            LOG.info("Result is: '%s'" % (result, ))

            # Convert to tokens
            tokens = [
                Token(word.strip(), 1.0, True) for word in result.split(' ')
                if word.strip() != ''
            ]
            return tokens

        except Exception as e:
            # Again, just grumble on exceptions
            LOG.info("Failed to do remote processing: %s" % e)
            return []

        finally:
            # Close it out, best effort
            try:
                LOG.info("Closing connection")
                self._sckt.shutdown(socket.SHUT_RDWR)
                self._sckt.close()
            except:
                pass
            finally:
                self._sckt = None
Esempio n. 6
0
 def _decode_raw(self, data):
     '''
     @see AudioInput._decode_raw()
     '''
     audio = numpy.frombuffer(data, numpy.int16)
     words = self._model.stt(audio, self._rate)
     LOG.info("Got: %s" % (words, ))
     tokens = [
         Token(word.strip(), 1.0, True) for word in words.split(' ')
         if len(word.strip()) > 0
     ]
     return tokens
Esempio n. 7
0
    def _handle(self, sckt):
        """
        Handle reading from a socket
        """
        LOG.info("Started new socket handler")

        # We'll build these up
        tokens = []
        cur = b''

        # Loop until they go away
        while True:
            c = sckt.recv(1)
            if c is None or len(c) == 0:
                LOG.info("Peer closed connection")
                return

            if len(cur) == 0 and ord(c) == 4:
                LOG.info("Got EOT")
                try:
                    sckt.close()
                except:
                    pass
                return

            if c in b' \t\n':
                if len(cur.strip()) > 0:
                    try:
                        tokens.append(Token(cur.strip().decode(), 1.0, True))
                    except Exception as e:
                        LOG.error("Error handling '%s': %s", cur, e)
                    cur = b''

                if c == b'\n':
                    if len(tokens) > 0:
                        if self._prefix:
                            tokens = self._prefix + tokens
                        self._output.append(tokens)
                        tokens = []

            else:
                cur += c
Esempio n. 8
0
    def _decode(self):
        """
        @see AudioInput._decode()
        """
        if self._context is None:
            # No context means no tokens
            LOG.warning("Had no stream context to close")
            tokens = []
        else:
            # Finish up by finishing the decoding
            words = self._context.finishStream()
            LOG.info("Got: %s" % (words, ))
            self._context = None

            # And tokenize
            tokens = [
                Token(word.strip(), 1.0, True) for word in words.split(' ')
                if len(word.strip()) > 0
            ]
        return tokens
Esempio n. 9
0
    def __init__(self, state, port=8008, prefix=None):
        """
        @see Input.__init__()
        :type  port: int
        :param port:
            The port to listen on.
        :type  prefix: str
        :param prefix:
            What to prefix to the beginning of any input.
        """
        super(SocketInput, self).__init__(state)

        self._port = int(port)
        if prefix and str(prefix).strip():
            self._prefix = [
                Token(word.strip(), 1.0, True)
                for word in str(prefix).strip().split() if word
            ]
        else:
            self._prefix = None

        self._socket = None
        self._output = []
Esempio n. 10
0
def tokenise(string):
    """
    Turn a string into a list of tokens.
    """
    from dexter.input import Token
    return [Token(e, 1.0, True) for e in string.split(' ')]
Esempio n. 11
0
    def _decode_raw(self, data):
        '''
        @see AudioInput._decode_raw()
        '''
        # Handle funy inputs
        if data is None or len(data) == 0:
            return []

        # Info in the header
        header = struct.pack('!qqqq', self._channels, self._width, self._rate,
                             len(data))

        # Connect
        LOG.info("Opening connection to %s:%d" % (
            self._host,
            self._port,
        ))
        try:
            # Connect
            sckt = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sckt.connect((self._host, self._port))

            # Send off our query
            LOG.info("Sending %d bytes of data to %s" %
                     (len(data), self._host))
            sckt.sendall(header)
            sckt.sendall(data)

            # Get back the result:
            #   8 bytes for the length
            #   data...
            LOG.info("Waiting for result...")
            length = b''
            while len(length) < 8:
                length += sckt.recv(8 - len(length))
            (count, ) = struct.unpack("!q", length)

            # Read in the string
            LOG.info("Reading %d chars" % (count, ))
            result = b''
            while len(result) < count:
                result += sckt.recv(count - len(result))
            result = result.decode()
            LOG.info("Result is: '%s'" % (result, ))

        except Exception as e:
            # Don't kill the thread by throwing an exception, just grumble
            LOG.info("Failed to do remote processing: %s" % e)
            return []

        finally:
            # Close it out, best effort
            try:
                LOG.info("Closing connection")
                sckt.shutdown(socket.SHUT_RDWR)
                sckt.close()
            except:
                pass

        # Convert to tokens
        tokens = [
            Token(word.strip(), 1.0, True) for word in result.split(' ')
            if word.strip() != ''
        ]
        return tokens