예제 #1
0
    def evaluate(self, tokens):
        """
        @see Service.evaluate()
        """
        # Get the words, ready to match with
        words = self._words(tokens)

        # Look for these prefixes
        prefices = (('spell', ), ('how', 'do', 'you', 'spell'))
        match = None
        for prefix in prefices:
            try:
                # Look for the prefix and suffix in the words
                (start, end, score) = fuzzy_list_range(words, prefix)
                LOG.debug("%s matches %s with from %d to %d with score %d",
                          prefix, words, start, end, score)

                # Get the best one
                if (start == 0 and (match is None or match[2] < score)):
                    match = (start, end, score)
            except ValueError:
                pass

        # Did we get anything?
        if match is not None:
            # Simply give these to the handler
            (start, end, score) = match
            return _SpellingHandler(self, tokens, score / 100.0, words[end:])
        else:
            # Nope, we got nothing
            return None
예제 #2
0
    def evaluate(self, tokens):
        """
        @see Service.evaluate()
        """
        # Get the words, ready to match with
        words = self._words(tokens)

        # This is how it could be phrased
        fixes = ((('define', ), tuple()), (('what', 'is', 'the', 'meaning',
                                            'of'), tuple()), (('what', 'does'),
                                                              ('mean', )))
        match = None
        for (prefix, suffix) in fixes:
            try:
                # Look for the prefix and suffix in the words
                if len(prefix) > 0:
                    (pre_start, pre_end,
                     pre_score) = fuzzy_list_range(words, prefix)
                else:
                    (pre_start, pre_end, pre_score) = (0, 0, 100)
                if len(suffix) > 0:
                    (suf_start, suf_end,
                     suf_score) = fuzzy_list_range(words, suffix)
                else:
                    (suf_start, suf_end, suf_score) = (len(words), len(words),
                                                       100)
                LOG.debug(
                    "%s matches %s with from %d to %d with score %d, "
                    "and %s matches from %d to %d with score %d", prefix,
                    words, pre_start, pre_end, pre_score, suffix, suf_start,
                    suf_end, suf_score)

                # We expect there to be only one word in the middle of the
                # prefix and suffix when we match
                if (pre_start == 0 and pre_end + 1 == suf_start
                        and suf_end == len(words)
                        and (match is None or match[2] < score)):
                    match = (pre_start, pre_end, pre_score, suf_start, suf_end,
                             suf_score)
            except ValueError:
                pass

        # Did we get anything?
        if match is not None:
            # Pull back the values
            (pre_start, pre_end, pre_score, suf_start, suf_end,
             suf_score) = match

            # The belief is the geometric distance of the scores
            belief = sqrt(pre_score * pre_score +
                          suf_score * suf_score) / 100.0

            # The word is the one at pre_end (since it's non-inclusive)
            word = words[pre_end]

            # And give back the handler
            return _DictionaryHandler(self, tokens, belief, word, self._limit)
        else:
            # Nope, we got nothing
            return None
예제 #3
0
    def _build_from_dirname(self, dirname):
        '''
        Build an index based on the given directory root.

        @type  dirname: str
        @param dirname:
            The directory name to build from.
        '''
        # Walk the tree
        for (subdir, subdirs, files) in os.walk(dirname):
            LOG.info("Indexing %s", subdir)

            # Handle all the files which we can find
            for filename in files:
                try:
                    # Use mutagen to grab details
                    path = os.path.join(subdir, filename)
                    info = mutagen.File(path)
                    if isinstance(info, mutagen.mp3.MP3):
                        self._add_entry(AudioEntry.from_mp3(info))
                    elif isinstance(info, mutagen.flac.FLAC):
                        self._add_entry(AudioEntry.from_flac(info))
                    else:
                        LOG.debug("Ignoring %s", path)
                except Exception as e:
                    LOG.warning("Failed to index %s: %s", path, e)
예제 #4
0
    def _decode(self):
        """
        @see AudioInput._decode()
        """
        # Collect anything remaining
        self._add_result(self._recognizer.FinalResult())

        # Ensure it's clear for next time
        self._recognizer.Reset()

        # Tokenize
        tokens = []
        LOG.debug("Decoding: %s" % self._results)
        for result in self._results:
            word = result.get('word', '').strip()
            conf = result.get('conf', 0.0)
            if word and conf:
                tokens.append(Token(word, conf, True))

        # Done
        self._results = []

        # And give them all back
        LOG.debug("Got: %s" % ' '.join(str(i) for i in tokens))
        return tokens
예제 #5
0
파일: __init__.py 프로젝트: iamsrp/dexter
    def run(self):
        """
        The main worker.
        """
        LOG.info("Starting the system")
        self._start()

        LOG.info("Entering main loop")
        while self._running:
            try:
                # Handle any events. First check to see if any time events are
                # pending and need to be scheduled.
                LOG.debug("Timer event queue length is %d",
                          len(self._timer_events))
                while len(self._timer_events) > 0 and \
                      self._timer_events[0].schedule_time <= time.time():
                    self._events.put(heapq.heappop(self._timer_events))

                # Now handle the actual events
                while not self._events.empty():
                    event = self._events.get()
                    try:
                        result = event.invoke()
                        if result is not None:
                            self._events.put(result)
                    except Exception as e:
                        LOG.error("Event %s raised exception: %s", event, e)

                # Loop over all the inputs and see if they have anything pending
                for input in self._inputs:
                    # Attempt a read, this will return None if there's nothing
                    # available
                    tokens = input.read()
                    if tokens is not None:
                        # Okay, we read something, attempt to handle it
                        LOG.info("Read from %s: %s" %
                                 (input, [str(t) for t in tokens]))
                        result = self._handle(tokens)

                        # If we got something back then give it back to the user
                        if result is not None:
                            self._respond(result)

                # Wait for a bit before going around again
                time.sleep(0.1)

            except KeyboardInterrupt:
                LOG.warning("KeyboardInterrupt received")
                break

        # We're out of the main loop, shut things down
        LOG.info("Stopping the system")
        self._stop()
예제 #6
0
    def evaluate(self, tokens):
        """
        @see Service.evaluate()
        """
        # Render to lower-case, for matching purposes.
        words = self._words(tokens)

        # Look for these types of queston
        prefices = (('what', 'is', 'a'), ('what', 'is', 'the'), ('what', 'is'),
                    ('who', 'is', 'the'), ('who', 'is'))
        match = None
        for prefix in prefices:
            try:
                # Look for the prefix in the words
                (start, end, score) = fuzzy_list_range(words, prefix)
                LOG.debug("%s matches %s with from %d to %d with score %d",
                          prefix, words, start, end, score)
                if start == 0 and (match is None or match[2] < score):
                    match = (start, end, score)
            except ValueError:
                pass

        # If we got a good match then use it
        if match:
            (start, end, score) = match
            thing = ' '.join(words[end:]).strip().lower()

            # Let's look to see if Wikipedia returns anything when we search
            # for this thing
            best = None
            try:
                self._notify(Notifier.ACTIVE)
                for result in wikipedia.search(thing):
                    if result is None or len(result) == 0:
                        continue
                    score = fuzz.ratio(thing, result.lower())
                    LOG.debug("'%s' matches '%s' with a score of %d", result,
                              thing, score)
                    if best is None or best[1] < score:
                        best = (result, score)
            except Exception as e:
                LOG.error("Failed to query Wikipedia for '%s': %s" %
                          (thing, e))
            finally:
                self._notify(Notifier.IDLE)

            # Turn the words into a string for the handler
            if best is not None:
                return _Handler(self, tokens, best[1] / 100, best[0])

        # If we got here then it didn't look like a query for us
        return None
예제 #7
0
 def evaluate(self, tokens):
     """
     @see Service.evaluate()
     """
     words = self._words(tokens)
     for (what, handler) in self._HANDLERS:
         for prefix in self._PREFICES:
             phrase = (prefix + what)
             try:
                 (s, e, _) = fuzzy_list_range(words, phrase)
                 if s == 0 and e == len(phrase):
                     return handler(self, tokens)
             except Exception as e:
                 LOG.debug("Failed to handle '%s': %s" %
                           (' '.join(words), e))
     return None
예제 #8
0
    def _add_result(self, json_result):
        """
        Add in any result we have from the given JSON string.
        """
        result = json.loads(json_result)
        LOG.debug("Got %s" % json_result)

        # See what we got, if anything
        if 'result' in result:
            # A full result, which is the best
            self._results.extend(result['result'])
        elif 'text' in result:
            # A decoded text string
            for word in result['text'].split():
                if word:
                    self._results.append({'word': word, 'conf': 1.0})
예제 #9
0
    def _get_data(self):
        """
        @see Handler.handle()
        """
        # We'll want to cache the data since hammering PurpleAir is unfriendly
        # and also results in getting back no data.
        sensor_id = self.service.get_sensor_id()
        filename = '/tmp/dexter_purpleair_%s' % (sensor_id, )
        now = time.time()
        content = None

        # Look for a cached version which is less and a minute old
        try:
            ctime = os.stat(filename).st_ctime
            if now - ctime < 60:
                with open(filename, 'rb') as fh:
                    content = fh.read()
        except IOError:
            pass

        # If we didn't have a good cached version then download it
        if not content:
            h = httplib2.Http()
            resp, content = \
                h.request("https://www.purpleair.com/json?show=%d" % (sensor_id,),
            "GET",
            headers={'content-type':'text/plain'} )

            # Save what we downloaded into the cache
            try:
                with open(filename, 'wb') as fh:
                    fh.write(content)
            except IOError:
                pass

        # Now load in whatever we had
        raw = json.loads(content)

        # And pull out the first value from the "results" section, which should
        # be what we care about
        if 'results' not in raw or len(raw['results']) == 0:
            return {}
        else:
            LOG.debug("Got: %s", (raw['results'][0], ))
            return raw['results'][0]
예제 #10
0
    def _handler(self):
        """
        Pulls values from the decoder queue and handles them appropriately. Runs in
        its own thread.
        """
        LOG.info("Started decoding handler")
        while True:
            try:
                # Get a handle on the queue. This will be nulled out when we're
                # done.
                queue = self._decode_queue
                if queue is None:
                    break

                # Anything?
                if len(queue) > 0:
                    item = queue.popleft()
                    if item is None:
                        # A None denotes the end of the data so we look to
                        # decode what we've been given
                        LOG.info("Decoding audio")
                        self._notify(Notifier.WORKING)
                        self._output.append(self._decode())
                        self._notify(Notifier.IDLE)
                    elif isinstance(item, bytes):
                        # Something to feed the decoder
                        LOG.debug("Feeding %d bytes" % len(item))
                        self._feed_raw(item)
                    else:
                        LOG.warning("Ignoring junk on decode queue: %r" % (item,))

                    # Go around again
                    continue

            except Exception as e:
                # Be robust but log it
                LOG.error("Got an error in the decoder queue: %s" % (e,))

            # Don't busy-wait
            time.sleep(0.001)

        # And we're done!
        LOG.info("Stopped decoding handler")
예제 #11
0
파일: randomness.py 프로젝트: iamsrp/dexter
    def evaluate(self, tokens):
        """
        @see Service.evaluate()
        """
        # The incoming request
        words = self._words(tokens)

        # Binary random number
        for phrase in ("toss a coin", "flip a coin"):
            try:
                fuzzy_list_range(words, phrase)
                return _CoinTossHandler(self, tokens)
            except ValueError:
                pass

        # A regular die
        for phrase in ("roll a die", "roll a dice"):
            try:
                fuzzy_list_range(words, phrase)
                return _DiceHandler(self, tokens, 6)
            except ValueError:
                pass
            
        # A generic request
        try:
            prefix = ('give', 'me', 'a', 'number', 'between')
            (_, offset, _)  = fuzzy_list_range(words, prefix)
            if len(words) >= offset + 3:
                and_index = words.index('and')
                start     = parse_number(words[offset     :and_index])
                end       = parse_number(words[and_index+1:])
                if start is not None and end is not None:
                    return _RangeHandler(self, tokens, start, end)
        except Exception as e:
            LOG.debug("Failed to handle '%s': %s" % (phrase, e))

        # Not for us
        return None
예제 #12
0
    def _feed_raw(self, data):
        """
        @see AudioInput._feed_raw()
        """
        # Handle funy inputs
        if data is None or len(data) == 0:
            return

        # Don't let exceptions kill the thread
        try:
            # Connect?
            if self._sckt is None:
                # Connect and send the header information
                LOG.info("Opening connection to %s:%d" % (
                    self._host,
                    self._port,
                ))
                self._sckt = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                self._sckt.connect((self._host, self._port))
                self._sckt.sendall(self._header)

            # Send off the chunk
            LOG.debug("Sending %d bytes of data to %s" %
                      (len(data), self._host))
            self._sckt.sendall(struct.pack('!q', len(data)))
            self._sckt.sendall(data)

        except Exception as e:
            # Don't kill the thread by throwing an exception, just grumble
            LOG.info("Failed to send to remote side: %s" % e)
            try:
                self._sckt.shutdown(socket.SHUT_RDWR)
                self._sckt.close()
            except:
                pass
            finally:
                self._sckt = None
            return
예제 #13
0
 def _match_artist(self, artist):
     """
     @see MusicService._match_artist()
     """
     artist = ' '.join(artist).lower()
     LOG.debug("Matching artist '%s'", artist)
     result = self._spotify.search(artist, type='artist')
     if 'artists' in result and 'items' in result['artists']:
         items = result['artists']['items']
         LOG.debug("Checking %d results", len(items))
         for item in items:
             name = item.get('name', '').lower()
             LOG.debug("Matching against '%s'", name)
             if fuzz.ratio(name, artist) > 80:
                 return True
     return False
예제 #14
0
파일: bespoke.py 프로젝트: iamsrp/dexter
    def evaluate(self, tokens):
        """
        @see Service.evaluate()
        """
        # The incoming text
        words = self._words(tokens)

        # Look for the match phrases
        for (phrase, reply, is_prefix) in self._phrases:
            try:
                LOG.debug("Looking for %s in %s", phrase, words)
                (start, end, score) = fuzzy_list_range(words, phrase)
                LOG.debug("Matched [%d:%d] and score %d", start, end, score)
                if start == 0 and (not is_prefix or end == len(phrase)):
                    return _BespokeHandler(self, tokens, reply)
            except ValueError as e:
                LOG.debug("No match: %s", e)
예제 #15
0
    def _get_handler_for(self, tokens, platform_match, genre, artist,
                         song_or_album):
        """
        @see MusicService._get_handler_for()
        """
        # Do nothing if we have no name
        if song_or_album is None or len(song_or_album) == 0:
            return None

        # Normalise to strings
        name = ' '.join(song_or_album).lower()
        if artist is None or len(artist) == 0:
            artist = None
        else:
            artist = ' '.join(artist).lower()

        # We will put all the track URIs in here
        uris = []

        # Search by track name then album name, these are essentially the same
        # logic
        for which in ('track', 'album'):
            LOG.info("Looking for '%s'%s as a %s", name,
                     " by '%s'" % artist if artist else '', which)

            # This is the key in the results
            plural = which + 's'

            # Try using the song_or_album as the name
            result = self._spotify.search(name, type=which)
            if not result:
                LOG.info("No results")
                continue

            # Did we get back any tracks
            if plural not in result:
                LOG.error("%s was not in result keys: %s", plural,
                          result.keys())
                continue

            # We got some results back, let's assign scores to them all
            results = result[plural]
            matches = []
            for item in results.get('items', []):
                # It must have a uri
                if 'uri' not in item and item['uri']:
                    LOG.error("No URI in %s", item)

                # Look at all the candidate entries
                if 'name' in item:
                    # See if this is better than any existing match
                    name_score = fuzz.ratio(name, item['name'].lower())
                    LOG.debug("'%s' matches '%s' with score %d", item['name'],
                              name, name_score)

                    # Check to make sure that we have an artist match as well
                    if artist is None:
                        # Treat as a wildcard
                        artist_score = 100
                    else:
                        artist_score = 0
                        for entry in item.get('artists', []):
                            score = fuzz.ratio(artist,
                                               entry.get('name', '').lower())
                            LOG.debug("Artist match score for '%s' was %d",
                                      entry.get('name', ''), score)
                            if score > artist_score:
                                artist_score = score
                    LOG.debug("Artist match score was %d", artist_score)

                    # Only consider cases where the scores look "good enough"
                    if name_score > 75 and artist_score > 75:
                        LOG.debug("Adding match")
                        matches.append((item, name_score, artist_score))

            # Anything?
            if len(matches) > 0:
                LOG.debug("Got %d matches", len(matches))

                # Order them accordingly
                matches.sort(key=lambda e: (e[1], e[2]))

                # Now, pick the top one
                best = matches[0]
                item = best[0]
                LOG.debug("Best match was: %s", item)

                # Extract the info
                item_name = item.get('name', None) or name
                artists = item.get('artists', [])
                artist_name = (artists[0].get('name', None)
                               if len(artists) > 0 else None) or artist

                # Description of what we are playing
                what = item_name if item_name else name
                if artist_name:
                    what += " by " + artist_name
                what += " on Spotify"

                # The score is the geometric value of the two
                score = sqrt(best[1] * best[1] + best[2] * best[2]) / 100.0

                # The should be here
                assert 'uri' in item, "Missing URI in %s" % (item, )
                uri = item['uri']

                # If we are an album then grab the track URIs
                if which == 'album':
                    tracks = self._spotify.album_tracks(uri)
                    if tracks and 'items' in tracks:
                        uris = [track['uri'] for track in tracks['items']]
                else:
                    # Just the track
                    uris = [uri]

                # And we're done
                break

        # Otherwise assume that it's an artist
        if len(uris) == 0 and artist is None:
            LOG.info("Looking for '%s' as an artist", name)
            result = self._spotify.search(name, type='artist')
            LOG.debug("Got: %s", result)

            if result and 'artists' in result and 'items' in result['artists']:
                items = sorted(result['artists']['items'],
                               key=lambda entry: fuzz.ratio(
                                   name,
                                   entry.get('name', '').lower()),
                               reverse=True)

                # Look at the best one, if any
                LOG.debug("Got %d matches", len(items))
                if len(items) > 0:
                    match = items[0]
                    who = match['name']
                    what = "%s on Spotify" % (who, )
                    score = fuzz.ratio(who.lower(), name)

                    # Find all their albums
                    if 'uri' in match:
                        LOG.debug("Got match: %s", match['uri'])
                        artist_albums = self._spotify.artist_albums(
                            match['uri'])
                        for album in artist_albums.get('items', []):
                            # Append all the tracks
                            LOG.debug("Looking at album: %s", album)
                            if 'uri' in album:
                                tracks = self._spotify.album_tracks(
                                    album['uri'])
                                if tracks and 'items' in tracks:
                                    LOG.debug(
                                        "Adding tracks: %s",
                                        ' '.join(track['name']
                                                 for track in tracks['items']))
                                    uris.extend([
                                        track['uri']
                                        for track in tracks['items']
                                    ])

        # And now we can give it back, if we had something
        if len(uris) > 0:
            return _SpotifyServicePlayHandler(self, tokens, what, uris, score)
        else:
            # We got nothing
            return None
예제 #16
0
    def _run(self):
        '''
        Reads from the audio input stream and hands it off to be processed.
        '''
        # This possibly takes a while so tell the system what we're doing.
        self._notify(Notifier.INIT)

        # The number of read calls which we expect per second. This corresponds
        # to many entries in a buffer constitute a second's worth of data.
        read_rate = self._rate / self._chunk_size

        # The buffer of historical audio data
        audio_buf = deque(maxlen=int(1.0 * read_rate))
        level_buf = deque(maxlen=int(2.0 * read_rate))

        # The index at which we cut the level buffer for the purposes of looking
        # for a change in the audio going from background to noisy, or vice
        # versa. This is what we are looking for when detecting speech.
        avg_idx = level_buf.maxlen // 3

        # Start pulling in the audio stream
        p = pyaudio.PyAudio()
        stream = p.open(format=self._format,
                        channels=self._channels,
                        rate=self._rate,
                        input=True,
                        frames_per_buffer=self._chunk_size)

        # State etc.
        talking = None  # True when we have detect talking
        speech = None  # What we will process as speech data
        min_secs = 4
        max_secs = 10
        last_log = 0

        # Keep listening until we are stopped
        while self.is_running:
            # We'll need this here and there below
            now = time.time()

            # Read in the next lump of data and get its average volume
            chunk = stream.read(self._chunk_size, exception_on_overflow=False)
            level = numpy.sqrt(abs(audioop.avg(chunk, self._width)))

            # Accumulate into our buffers
            audio_buf.append(chunk)
            level_buf.append(level)

            # If we have not yet filled up the level buffer then we're done
            # here. Any analysis etc. will be inaccurate.
            if len(level_buf) != level_buf.maxlen:
                continue

            # Get the averaging window as a numpy array so that we can cut it
            # and so forth
            levels = numpy.array(level_buf)

            # Determine the background level of sound. We only do this if we
            # don't think that anyone is talking. If we are doing this for the
            # first time then we can note that we have become actively
            # listening.
            if talking is None:
                LOG.info("Listening")
                self._notify(Notifier.IDLE)
                talking = False
                talking_start = 0

            # Only look to see if someone is speaking if the system is
            # not. Otherwise we will likely hear ourselves.
            if self._state.is_speaking() and talking:
                LOG.info("Ignoring talking since audio is being output")
                talking = False
                speech = None
                continue

            # We are looking for the background levels. If we think that someone
            # is talking then the background sound going to be at the end of the
            # levels, else it will be at the start.

            # Different detection based on what we are looking for
            if not talking:
                # Looking for a step up in the latter part
                from_levels = levels[:-avg_idx]  # From start to avg_idx
                to_levels = levels[-avg_idx:]  # From avg_idx to end
                from_pctl = numpy.sort(from_levels)[int(
                    len(from_levels) * 0.5)]
                to_pctl = numpy.sort(to_levels)[int(len(to_levels) * 0.6)]
                LOG.debug("Levels are from=%0.2f to=%0.2f", from_pctl, to_pctl)
                if from_pctl * 1.5 < to_pctl:
                    LOG.info("Detected start of speech "
                             "with levels going from %0.2f to %0.2f" %
                             (from_pctl, to_pctl))
                    talking = True
                    talking_start = now
                    start_pctl = from_pctl
            else:
                # Looking for a step down in the latter part
                from_levels = levels[:avg_idx]  # From start to avg_idx
                to_levels = levels[avg_idx:]  # From avg_idx to end
                from_pctl = numpy.sort(from_levels)[int(
                    len(from_levels) * 0.5)]
                to_pctl = numpy.sort(to_levels)[int(len(to_levels) * 0.5)]
                if now - last_log > 0.2:
                    LOG.info("Levels are from=%0.2f to=%0.2f", from_pctl,
                             to_pctl)
                    last_log = now
                else:
                    LOG.debug("Levels are from=%0.2f to=%0.2f", from_pctl,
                              to_pctl)
                if (now - talking_start > min_secs
                        and (from_pctl > to_pctl * 1.25
                             or to_pctl < start_pctl * 1.1)):
                    LOG.info("Detected end of speech "
                             "with levels going from %0.2f to %0.2f" %
                             (from_pctl, to_pctl))
                    talking = False

            # If the talking has been going on too long then just stop it. Quite
            # possibly the capture was fooled.
            if talking and now - talking_start > max_secs:
                LOG.info("Talking lasted over %ds; pushing to False" %
                         max_secs)
                talking = False

            # Different behaviour depending on whether we think someone is
            # talking or not
            if talking:
                # If we don't yet have any audio then we're starting the
                # recording
                if speech is None:
                    # Move the rolling window of recording to be the start of
                    # the audio
                    LOG.info("Starting recording")
                    self._notify(Notifier.ACTIVE)
                    speech = list(audio_buf)

                # Add on what we just recorded
                speech.append(chunk)

            # We deem that talking is still happening if it started only a
            # little while ago
            elif speech is not None:
                # There's no talking but there is recorded audio. That means
                # someone just stopped talking.
                LOG.info("Finished recording")

                # Turn the audio data into text (hopefully!)
                self._notify(Notifier.WORKING)
                start = time.time()

                # Turn the stream into a list of bytes and junk the speech
                # buffer
                audio = b''.join(speech)
                speech = None

                # Maybe save then as a wav file
                self._save_bytes(audio)

                # Now decode
                LOG.info("Decoding %0.2fs seconds of audio" %
                         (len(audio) / self._width / self._rate))
                tokens = self._decode_raw(audio)
                LOG.info("Decoded audio in %0.2fs: %s" %
                         (time.time() - start, ([str(x) for x in tokens])))

                # Add then to the output
                self._output.append(tokens)

                # Flush anything accumulated while we were parsing the phrase,
                # so that we don't fall behind
                available = stream.get_read_available()
                while (available > self._chunk_size):
                    LOG.debug("Junking backlog of %d", available)
                    stream.read(available)
                    available = stream.get_read_available()

                # Clear out the level buffer so that it can settle again
                level_buf.clear()

                # And we're back to listening
                LOG.info("Listening")
                self._notify(Notifier.IDLE)

        # If we got here then _running was set to False and we're done
        LOG.info("Done listening")
        stream.close()
        p.terminate()
예제 #17
0
    def _run(self):
        """
        Reads from the audio input stream and hands it off to be processed.
        """
        # This possibly takes a while so tell the system what we're doing.
        self._notify(Notifier.INIT)

        # The number of read calls which we expect per second. This corresponds
        # to many entries in a buffer constitute a second's worth of data.
        read_rate = self._rate / self._chunk_size

        # The buffer of historical audio data
        audio_buf = deque(maxlen=int(1.0 * read_rate))
        level_buf = deque(maxlen=int(2.0 * read_rate))

        # The index at which we cut the level buffer for the purposes of looking
        # for a change in the audio going from background to noisy, or vice
        # versa. This is what we are looking for when detecting speech.
        avg_idx = level_buf.maxlen // 3

        # Start pulling in the audio stream
        p      = pyaudio.PyAudio()
        stream = p.open(format           =self._format,
                        channels         =self._channels,
                        rate             =self._rate,
                        input            =True,
                        frames_per_buffer=self._chunk_size)

        # State
        talking = None  # True when we have detect talking
        speech  = None  # What we will process as speech data

        # Limits on recording
        min_secs =  2 # <-- Enough for the key-phrase only
        max_secs = 10 # <-- Plenty?

        # Init is done, we start off idle
        self._notify(Notifier.IDLE)

        # Keep listening until we are stopped
        while self.is_running:
            # We'll need this here and there below
            now = time.time()

            # Read in the next lump of data and get its volume. It looks like
            # rms() is the the best measure of this but I could be wrong.
            chunk = stream.read(self._chunk_size, exception_on_overflow=False)
            level = abs(audioop.rms(chunk, self._width))

            # Accumulate into our buffers
            audio_buf.append(chunk)
            level_buf.append(level)

            # If we have not yet filled up the level buffer then we're done
            # here. Any analysis etc. will be inaccurate.
            if len(level_buf) != level_buf.maxlen:
                continue

            # Get the averaging window as a numpy array so that we can cut it
            # and so forth
            levels = numpy.array(level_buf)

            # Determine the background level of sound. We only do this if we
            # don't think that anyone is talking. If we are doing this for the
            # first time then we can note that we have become actively
            # listening.
            if talking is None:
                LOG.info("Listening")
                self._notify(Notifier.IDLE)
                talking       = False
                talking_start = 0

            # We are looking for the background levels. If we think that someone
            # is talking then the background sound going to be at the end of the
            # levels, else it will be at the start.

            # Get the median level as we transition
            from_levels = levels[        :-avg_idx] # From start to avg_idx
            to_levels   = levels[-avg_idx:        ] # From avg_idx to end
            from_median = numpy.sort(from_levels)[int(len(from_levels) * 0.5)]
            to_median   = numpy.sort(to_levels  )[int(len(to_levels  ) * 0.5)]
            LOG.debug("Levels are from=%0.2f to=%0.2f", from_median, to_median)

            # Different detection based on what we are looking for
            if not talking:
                # Looking for a step up in the latter part
                if from_median * 1.5 < to_median:
                    LOG.info("Detected start of speech "
                             "with levels going from %0.2f to %0.2f" %
                             (from_median, to_median))
                    talking = True
                    talking_start = now
                    start_median = from_median
            else:
                # Looking for a step down in the latter part
                if (now - talking_start > min_secs and
                    (from_median > to_median * 1.25 or to_median < start_median * 1.1)):
                    LOG.info("Detected end of speech "
                             "with levels going from %0.2f to %0.2f" %
                             (from_median, to_median))
                    talking = False

            # If the talking has been going on too long then just stop it. Quite
            # possibly the capture was fooled.
            if talking and now - talking_start > max_secs:
                LOG.info("Talking lasted over %ds; pushing to False" % max_secs)
                talking = False

            # Different behaviour depending on whether we think someone is
            # talking or not
            if talking:
                # If we don't yet have any audio then we're starting the
                # recording
                if speech is None:
                    # Move the rolling window of recording to be the start of
                    # the audio
                    LOG.info("Starting recording")
                    self._notify(Notifier.ACTIVE)
                    speech = []

                    # Push in everything that we have so far
                    for prev in audio_buf:
                        speech.append(prev)
                        self._decode_queue.append(prev)

                # Add on what we just recorded
                speech.append(chunk)
                self._decode_queue.append(chunk)

            # We deem that talking is still happening if it started only a
            # little while ago
            elif speech is not None:
                # There's no talking but there is recorded audio. That means
                # someone just stopped talking.
                LOG.info("Finished recording")

                # Turn the stream into a list of bytes and junk the speech
                # buffer
                audio  = b''.join(speech)
                speech = None

                # Maybe save then as a wav file
                self._save_bytes(audio)

                # Now decode. We do this by denoting the end of the audio with a None.
                self._decode_queue.append(None)

                # Clear out the level buffer so that it can settle again
                level_buf.clear()

                # And we're back to listening
                LOG.info("Listening")

        # If we got here then _running was set to False and we're done
        LOG.info("Done listening")
        self._decode_queue = None
        stream.close()
        p.terminate()
예제 #18
0
def fuzzy_list_range(list_,
                     sublist,
                     start         =0,
                     threshold     =80,
                     homonize_words=True):
    """
    Find the slice range of a sublist of strings within a list, using fuzzy
    matching.

    :type  list_: list<str> or tuple<str>
    :param list_:
        The list to look in.
    :type  sublist: list<str> or tuple<str>
    :param sublist:
        The list to look for.
    :type  start: int
    :param start:
        Where to start looking in the C{list}.
    :type  threshold: int
    :param threshold:
        The fuzzy matching percentage threshold which the sublist must match
        with.
    :type  homoize_words: bool
    :param homoize_words:
        Whether to homonize the words before fuzzing.

    :rtype: tuple
    :return:
        A tuple of C{start, end, score} where start and end are a half-inclusive
        slice and score is the matching score.

    >>> fuzzy_list_range('whot is a fash'.split(' '), 'a fish'.split(' '))
    (2, 4, 83)
    >>> fuzzy_list_range(['what', 'is', 'a', 'fish'], ('whit', 'is'))
    (0, 2, 86)
    >>> fuzzy_list_range(['format', 'c', 'colon'], ('format', 'sea', 'colon'))
    (0, 3, 100)
    """
    # Sanity
    if list_ is None:
        raise ValueError("list was None")
    if sublist is None:
        raise ValueError("sublist was None")

    # The empty list can't be in anything
    if len(sublist) == 0:
        raise ValueError("Empty sublist not in list")

    # Say what we got before normalisation occurs
    LOG.debug("Given '%s' to look for in '%s'",
              ' '.join(sublist), ' '.join(list_[start:]))

    # Since we're doing fuzzy matching let's make these into words
    def as_word(entry):
        """
        Perform normalisation on the given word.
        """
        try:
            value = float(entry)
            if value == int(value):
                value = number_to_words(int(value))
            else:
                value = number_to_words(value)
        except:
            value = to_alphanumeric(entry.lower())
        if homonize_words:
            value = homonize(value)
        return value

    # Turn the given lists into words
    subwords = tuple(as_word(e) for e in sublist)
    words    = tuple(as_word(e) for e in list_  )
    LOG.debug("Looking for '%s' in '%s'",
              ' '.join(subwords), ' '.join(words[start:]))

    # Look for the "best" match
    best = None

    # If we have a single thing then we have a simple case
    if len(words) == 1:
        # Extract it for simplicity
        query = subwords[0]

        # Look for an exact match first
        try:
            return words.index(query)
        except ValueError:
            pass

        # Find the first and bestest match
        for (index, entry) in enumerate(words):
            if index < start:
                continue
            score = fuzz.ratio(query, entry)
            if score >= threshold and (best is None or best[2] < score):
                best = (index, len(words), score)
    else:
        # We have a multi-element sublist, we are going to look for the best
        # matching sublist. This is going to be O(n^2) I'm afraid.
        query = ' '.join(subwords)
        for s in range(start, len(words)):
            for e in range(s + 1, len(words) + 1):
                phrase = ' '.join(words[s:e])
                score = fuzz.ratio(query, phrase)
                LOG.debug("Checking '%s' in [%d:%d] '%s' gives %d",
                          query, s, e, phrase, score)
                if score >= threshold and (best is None or best[2] < score):
                    best = (s, e, score)

    # Did we get anything?
    if best is None:
        raise ValueError("'%s' not found in %s'" % (sublist, list_))
    else:
        return best
예제 #19
0
파일: fortune.py 프로젝트: iamsrp/dexter
    def _pick(self):
        """
        Choose a random fortune. This is the meat of this class.
        """
        # We do this all from scratch each time since it's not _that_ expensive
        # and it means we don't have to restart anything when new files are
        # added. We have a list of filenames and the start and end of their data
        # as part of the total count.
        #
        # We are effectively concatenating the files here so as to avoid
        # bias. Consider: if you have two files, with one twice the size of the
        # other, if we picked a random fortune from a random file then then
        # fortunes in the smallee file would be twice as likely to come up as
        # ones in the bigger one.
        file_info = []
        total_size = 0
        for (subdir, _, files) in os.walk(self._dir, followlinks=True):
            for filename in files:
                # The fortune files have an associated .dat file, this means we
                # can identify them by looking for that .dat file.
                path = os.path.join(subdir, filename)
                dat_path = path + '.dat'
                LOG.debug("Candidate: %s %s", path, dat_path)
                if os.path.exists(dat_path):
                    # Open it to make sure can do so
                    try:
                        with open(path, 'rt'):
                            # Get the file length to use it to accumulate into
                            # our running counter, and to compute the file-
                            # specifc stats.
                            stat = os.stat(path)

                            # The start of the file is the current total_size
                            # and the end is that plus the file size
                            start = total_size
                            total_size += stat.st_size
                            end = total_size
                            file_info.append((path, start, end))
                            LOG.debug("Adding %s[%d:%d]", path, start, end)
                    except Exception as e:
                        LOG.debug("Failed to add %s: %s", path, e)


        # Keep trying this until we get something, or until we give up. Most of
        # the time we expect this to work on the first go unless something weird
        # is going on.
        for tries in range(10):
            LOG.debug("Try #%d", tries)

            # Now that we have a list of files, pick one at random by choosing a
            # point somewhere in there
            offset = random.randint(0, total_size)
            LOG.debug("Picked offset %d", offset)

            # Now we look for the file which contains that offset
            for (filename, start, end) in file_info:
                if start <= offset < end:
                    with open(filename, 'rt') as fh:
                        # Jump to the appropriate point in the file, according to
                        # the offset (relative to the files's start in the overall
                        # set)
                        seek_offset = offset - start
                        if seek_offset > 0:
                            fh.seek(seek_offset)

                        try:
                            # Now look for the bracketing '%'s. Read in a nice
                            # big chunk and hunt for it in there.
                            chunk = fh.read(min(10 * self._max_len, 1024 * 1024))

                            # The file could start with a bracketer and we want
                            # to catch that
                            if seek_offset == 0 and chunk.startswith('%\n'):
                                s = 2
                            else:
                                s = chunk.index('\n%\n') + 3

                            # Now look for the end. A properly-formed file
                            # should have a '%\n' as its last line.
                            e = chunk.index('\n%\n', s)

                            # We found a match. Is it small enough?
                            LOG.debug("Found section %s[%d:%d]", filename, s, e)
                            if (e - s) > self._max_len:
                                # Nope, go around and try again
                                break
                            else:
                                # Yes!
                                return chunk[s:e]

                        except ValueError:
                            # Find to match so give up and go around again
                            break

        # If we got here then we gave up trying
        return None
예제 #20
0
파일: util.py 프로젝트: blundercon/dexter
def parse_number(words):
    '''
    Turn a set of words into a number. These might be complex ("One thousand
    four hundred and eleven") or simple ("Seven").

    >>> parse_number('one')
    1
    >>> parse_number('one point eight')
    1.8
    >>> parse_number('minus six')
    -6
    >>> parse_number('minus four point seven eight nine')
    -4.789

    @type  words: str
    @parse words:
        The string words to parse. E.g. C{'twenty seven'}.
    '''
    # Sanity
    if words is None:
        return None

    # Make sure it's a string
    words = str(words)

    # Trim surrounding whitespace
    words = words.strip()

    # Not a lot we can do if we have no words
    if len(words) == 0:
        return None

    # First try to parse the string as an integer or a float directly
    if not re.search(r'\s', words):
        try:
            return int(words)
        except:
            pass
        try:
            return float(words)
        except:
            pass

    # Sanitise it since we're now going to attempt to parse it. Collapse
    # multiple spaces to one and strip out non-letters
    words = ' '.join(to_letters(s) for s in re.split(r'\s+', words))
    LOG.debug("Parsing '%s'" % (words, ))

    # Recheck for empty
    if words == '':
        return None

    # See if we have to negate the result
    mult = 1
    for neg in ("minus ", "negative "):
        if words.startswith(neg):
            words = words[len(neg):]
            mult = -1
            break

    # Look for "point" in the words since it might be "six point two" or
    # something
    if ' point ' in words:
        # Determine the integer and decimal portions
        (integer, decimal) = words.split(' point ', 1)
        LOG.debug("'%s' becomes '%s' and '%s'" % (words, integer, decimal))

        # Parsing the whole number is easy enough
        whole = parse_number(integer)
        if whole is None:
            return None

        # S;plit up the digits to parse them.
        digits = numpy.array(
            [parse_number(digit) for digit in decimal.split(' ')])
        if None in digits        or \
           numpy.any(digits < 0) or \
           numpy.any(digits > 9):
            LOG.error("'%s' was not a valid decimal" % (words, ))
            return None

        # Okay, use some cheese to parse into a float
        return mult * float('%d.%s' % (whole, ''.join(str(d) for d in digits)))

    else:
        # No ' point ' in it, parse directly
        try:
            return mult * _WORDS_TO_NUMBERS.parse(words)
        except Exception as e:
            LOG.error("Failed to parse '%s': %s" % (words, e))
            return None
예제 #21
0
    def _handler(self):
        """
        Pulls values from the decoder queue and handles them appropriately. Runs in
        its own thread.
        """
        # Whether we are skipping the current input
        gobble = False

        LOG.info("Started decoding handler")
        while True:
            try:
                # Get a handle on the queue. This will be nulled out when we're
                # done.
                queue = self._decode_queue
                if queue is None:
                    break

                # Anything?
                if len(queue) > 0:
                    item = queue.popleft()
                    if item is None:
                        # A None denotes the end of the data so we look to
                        # decode what we've been given if we're not throwing it
                        # away.
                        if gobble:
                            LOG.info("Dropped audio")
                        else:
                            LOG.info("Decoding audio")
                            self._notify(Notifier.WORKING)
                            self._output.append(self._decode())
                            self._notify(Notifier.IDLE)
                    elif isinstance(item, float):
                        # This is the timestamp of the clip. If it's too old
                        # then we throw it away.
                        age = time.time() - item
                        if int(age) > 0:
                            LOG.info("Upcoming audio clip is %0.2fs old" %
                                     (age, ))
                        gobble = age > self._GOBBLE_LIMIT
                    elif isinstance(item, bytes):
                        # Something to feed the decoder
                        if gobble:
                            LOG.debug("Ignoring %d bytes" % len(item))
                        else:
                            LOG.debug("Feeding %d bytes" % len(item))
                            self._feed_raw(item)
                    else:
                        LOG.warning("Ignoring junk on decode queue: %r" %
                                    (item, ))

                    # Go around again
                    continue

            except Exception as e:
                # Be robust but log it
                LOG.error("Got an error in the decoder queue: %s" % (e, ))

            # Don't busy-wait
            time.sleep(0.001)

        # And we're done!
        LOG.info("Stopped decoding handler")