예제 #1
0
    def __init__(self, files, threads=3, verbose=False):
        """Like `Speech2Text()`, but takes a list of sound files or a directory name to search
        for matching sound files, and returns a list of `(filename, response)` tuples.
        `response`'s are described in `Speech2Text.getResponse()`.

        Can use up to 5 concurrent threads. Intended for
        post-experiment processing of multiple files, in which waiting for a slow response
        is not a problem (better to get the data).

        If `files` is a string, it will be used as a directory name for glob
        (matching all `*.wav`, `*.flac`, and `*.spx` files).
        There's currently no re-try on http error."""
        list.__init__(self)  # [ (file1, resp1), (file2, resp2), ...]
        maxThreads = min(threads, 5)  # I get http errors with 6
        self.timeout = 30
        if type(files) == str and os.path.isdir(files):
            f = glob.glob(os.path.join(files, '*.wav'))
            f += glob.glob(os.path.join(files, '*.flac'))
            f += glob.glob(os.path.join(files, '*.spx'))
            fileList = f
        else:
            fileList = list(files)
        web.requireInternetAccess()  # needed to access google's speech API
        for i, filename in enumerate(fileList):
            gs = Speech2Text(filename, level=5)
            self.append((filename, gs.getThread()))  # tuple
            if verbose:
                logging.info("%i %s" % (i, filename))
            while self._activeCount() >= maxThreads:
                core.wait(.1, 0)  # idle at max count
예제 #2
0
    def test_Speech2Text(self):
        pytest.skip()  # google speech API gives Error 400: Bad request
        from psychopy import web
        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, 'red_16000.wav')

        gs = microphone.Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == 'red'

        # test batch-discover files in a directory
        tmp = join(self.tmp, 'tmp')
        os.mkdir(tmp)
        shutil.copy(testFile, tmp)
        bs = microphone.BatchSpeech2Text(files=tmp)

        bs = microphone.BatchSpeech2Text(
            files=glob.glob(join(self.tmp, 'red_*.wav')))
        while bs._activeCount():
            core.wait(.1, 0)
        resp = bs[0][1]
        assert 0.6 < resp.confidence < 0.75  # 0.68801856
        assert resp.word == 'red'
예제 #3
0
    def test_Speech2Text(self):
        pytest.skip()  # google speech API gives Error 400: Bad request

        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, 'red_16000.wav')

        gs = Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == 'red'

        # test batch-discover files in a directory
        tmp = join(self.tmp, 'tmp')
        os.mkdir(tmp)
        shutil.copy(testFile, tmp)
        bs = BatchSpeech2Text(files=tmp)

        bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav')))
        while bs._activeCount():
            core.wait(.1, 0)
        resp = bs[0][1]
        assert 0.6 < resp.confidence < 0.75  # 0.68801856
        assert resp.word == 'red'
예제 #4
0
    def __init__(self, files, threads=3, verbose=False):
        """Like `Speech2Text()`, but takes a list of sound files or a directory name to search
        for matching sound files, and returns a list of `(filename, response)` tuples.
        `response`'s are described in `Speech2Text.getResponse()`.

        Can use up to 5 concurrent threads. Intended for
        post-experiment processing of multiple files, in which waiting for a slow response
        is not a problem (better to get the data).

        If `files` is a string, it will be used as a directory name for glob
        (matching all `*.wav`, `*.flac`, and `*.spx` files).
        There's currently no re-try on http error."""
        list.__init__(self)  # [ (file1, resp1), (file2, resp2), ...]
        maxThreads = min(threads, 5)  # I get http errors with 6
        self.timeout = 30
        if type(files) == str and os.path.isdir(files):
            f = glob.glob(os.path.join(files, '*.wav'))
            f += glob.glob(os.path.join(files, '*.flac'))
            f += glob.glob(os.path.join(files, '*.spx'))
            fileList = f
        else:
            fileList = list(files)
        web.requireInternetAccess()  # needed to access google's speech API
        for i, filename in enumerate(fileList):
            gs = Speech2Text(filename, level=5)
            self.append((filename, gs.getThread()))  # tuple
            if verbose:
                logging.info("%i %s" % (i, filename))
            while self._activeCount() >= maxThreads:
                core.wait(.1, 0)  # idle at max count
예제 #5
0
    def test_Speech2Text(self):
        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, "red_16000.wav")

        gs = Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == "red"

        bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, "red_*.wav")))
        os.unlink(join(self.tmp, "green_48000.wav"))
        bs = BatchSpeech2Text(files=self.tmp, threads=1)
예제 #6
0
    def test_Speech2Text(self):
        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, 'red_16000.wav')

        gs = Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == 'red'

        bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav')))
        os.unlink(join(self.tmp, 'green_48000.wav'))
        bs = BatchSpeech2Text(files=self.tmp, threads=1)
예제 #7
0
    def test_Speech2Text(self):
        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, "red_16000.wav")

        gs = Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == "red"

        bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, "red_*.wav")))
        while bs._activeCount():
            core.wait(0.1, 0)
        resp = bs[0][1]
        assert 0.6 < resp.confidence < 0.75  # 0.68801856
        assert resp.word == "red"
예제 #8
0
    def test_Speech2Text(self):
        try:
            web.requireInternetAccess()
        except web.NoInternetAccessError:
            pytest.skip()

        # load a known sound file
        testFile = join(self.tmp, 'red_16000.wav')

        gs = Speech2Text(filename=testFile)
        resp = gs.getResponse()
        assert resp.word == 'red'

        bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav')))
        while bs._activeCount():
            core.wait(.1, 0)
        resp = bs[0][1]
        assert 0.6 < resp.confidence < 0.75  # 0.68801856
        assert resp.word == 'red'
예제 #9
0
    def __init__(self, filename,
                 lang='en-US',
                 timeout=10,
                 samplingrate=16000,
                 pro_filter=2,
                 level=0):
        """
            :Parameters:

                `filename` : <required>
                    name of the speech file (.flac, .wav, or .spx) to process. wav files will be
                    converted to flac, and for this to work you need to have flac (as an
                    executable). spx format is speex-with-headerbyte (for Google).
                `lang` :
                    the presumed language of the speaker, as a locale code; default 'en-US'
                `timeout` :
                    seconds to wait before giving up, default 10
                `samplingrate` :
                    the sampling rate of the speech clip in Hz, either 16000 or 8000. You can
                    record at a higher rate, and then down-sample to 16000 for speech
                    recognition. `file` is the down-sampled file, not the original.
                    the sampling rate is auto-detected for .wav files.
                `pro_filter` :
                    profanity filter level; default 2 (e.g., f***)
                `level` :
                    flac compression level (0 less compression but fastest)
        """
        # set up some key parameters:
        results = 5  # how many words wanted
        self.timeout = timeout
        useragent = PSYCHOPY_USERAGENT
        host = "www.google.com/speech-api/v1/recognize"

        # determine file type, convert wav to flac if needed:
        if not os.path.isfile(filename):
            raise IOError("Cannot find file: %s" % filename)
        ext = os.path.splitext(filename)[1]
        if ext not in ['.flac', '.wav']:
            raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext)
        if ext == '.wav':
            _junk, samplingrate = readWavFile(filename)
        if samplingrate not in [16000, 8000]:
            raise SoundFormatNotSupported(
                'Speech2Text sample rate must be 16000 or 8000 Hz')
        self.filename = filename
        if ext == ".flac":
            filetype = "x-flac"
        elif ext == ".wav":  # convert to .flac
            filetype = "x-flac"
            filename = wav2flac(filename, level=level)  # opt for speed
        logging.info("Loading: %s as %s, audio/%s" %
                     (self.filename, lang, filetype))
        # occasional error; core.wait(.1) is not always enough; better slow
        # than fail
        c = 0
        while not os.path.isfile(filename) and c < 10:
            core.wait(.1, 0)
            c += 1
        audio = open(filename, 'rb').read()
        if ext == '.wav' and filename.endswith('.flac'):
            try:
                os.remove(filename)
            except Exception:
                pass

        # urllib2 makes no attempt to validate the server certificate. here's an idea:
        # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/
        # set up the https request:
        url = 'https://' + host + '?xjerr=1&' +\
              'client=psychopy2&' +\
              'lang=' + lang + '&'\
              'pfilter=%d' % pro_filter + '&'\
              'maxresults=%d' % results
        header = {'Content-Type': 'audio/%s; rate=%d' % (filetype, samplingrate),
                  'User-Agent': useragent}
        web.requireInternetAccess()  # needed to access google's speech API
        try:
            self.request = urllib.request.Request(url, audio, header)
        except Exception:  # pragma: no cover
            # try again before accepting defeat
            logging.info("https request failed. %s, %s. trying again..." %
                         (filename, self.filename))
            core.wait(0.2, 0)
            self.request = urllib.request.Request(url, audio, header)
예제 #10
0
    def __init__(self, filename, lang="en-US", timeout=10, samplingrate=16000, pro_filter=2, quiet=True):
        """
            :Parameters:

                `filename` : <required>
                    name of the speech file (.flac, .wav, or .spx) to process. wav files will be
                    converted to flac, and for this to work you need to have flac (as an
                    executable). spx format is speex-with-headerbyte (for google).
                `lang` :
                    the presumed language of the speaker, as a locale code; default 'en-US'
                `timeout` :
                    seconds to wait before giving up, default 10
                `samplingrate` :
                    the sampling rate of the speech clip in Hz, either 16000 or 8000. You can
                    record at a higher rate, and then down-sample to 16000 for speech
                    recognition. `file` is the down-sampled file, not the original.
                    the sampling rate is auto-detected for .wav files.
                `pro_filter` :
                    profanity filter level; default 2 (e.g., f***)
                `quiet` :
                    no reporting intermediate details; default `True` (non-verbose)
        """
        # set up some key parameters:
        results = 5  # how many words wanted
        self.timeout = timeout
        useragent = PSYCHOPY_USERAGENT
        host = "www.google.com/speech-api/v1/recognize"
        flac_path = _getFlacPath()

        # determine file type, convert wav to flac if needed:
        if not os.path.isfile(filename):
            raise IOError("Cannot find file: %s" % filename)
        ext = os.path.splitext(filename)[1]
        if ext not in [".flac", ".spx", ".wav"]:
            raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext)
        if ext == ".wav":
            __, samplingrate = readWavFile(filename)
        if samplingrate not in [16000, 8000]:
            raise SoundFormatNotSupported("Speech2Text sample rate must be 16000 or 8000 Hz")
        self.filename = filename
        if ext == ".flac":
            filetype = "x-flac"
        elif ext == ".spx":
            filetype = "x-speex-with-header-byte"
        elif ext == ".wav":  # convert to .flac
            filetype = "x-flac"
            filename = wav2flac(filename)
        logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype))
        c = 0  # occasional error; core.wait(.1) is not always enough; better slow than fail
        while not os.path.isfile(filename) and c < 10:
            core.wait(0.1, 0)
            c += 1
        try:
            audio = open(filename, "rb").read()
        except:
            msg = "Can't read file %s from %s.\n" % (filename, self.filename)
            logging.error(msg)
            raise SoundFileError(msg)
        finally:
            if ext == ".wav" and filename.endswith(".flac"):
                try:
                    os.remove(filename)
                except:
                    pass

        # urllib2 makes no attempt to validate the server certificate. here's an idea:
        # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/
        # set up the https request:
        url = (
            "https://" + host + "?xjerr=1&" + "client=psychopy2&" + "lang=" + lang + "&"
            "pfilter=%d" % pro_filter + "&"
            "maxresults=%d" % results
        )
        header = {"Content-Type": "audio/%s; rate=%d" % (filetype, samplingrate), "User-Agent": useragent}
        web.requireInternetAccess()  # needed to access google's speech API
        try:
            self.request = urllib2.Request(url, audio, header)
        except:  # try again before accepting defeat
            logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename))
            core.wait(0.2, 0)
            self.request = urllib2.Request(url, audio, header)
예제 #11
0
    def __init__(self,
                 filename,
                 lang='en-US',
                 timeout=10,
                 samplingrate=16000,
                 pro_filter=2,
                 level=0):
        """
            :Parameters:

                `filename` : <required>
                    name of the speech file (.flac, .wav, or .spx) to process. wav files will be
                    converted to flac, and for this to work you need to have flac (as an
                    executable). spx format is speex-with-headerbyte (for Google).
                `lang` :
                    the presumed language of the speaker, as a locale code; default 'en-US'
                `timeout` :
                    seconds to wait before giving up, default 10
                `samplingrate` :
                    the sampling rate of the speech clip in Hz, either 16000 or 8000. You can
                    record at a higher rate, and then down-sample to 16000 for speech
                    recognition. `file` is the down-sampled file, not the original.
                    the sampling rate is auto-detected for .wav files.
                `pro_filter` :
                    profanity filter level; default 2 (e.g., f***)
                `level` :
                    flac compression level (0 less compression but fastest)
        """
        # set up some key parameters:
        results = 5  # how many words wanted
        self.timeout = timeout
        useragent = PSYCHOPY_USERAGENT
        host = "www.google.com/speech-api/v1/recognize"

        # determine file type, convert wav to flac if needed:
        if not os.path.isfile(filename):
            raise IOError("Cannot find file: %s" % filename)
        ext = os.path.splitext(filename)[1]
        if ext not in ['.flac', '.wav']:
            raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext)
        if ext == '.wav':
            _junk, samplingrate = readWavFile(filename)
        if samplingrate not in [16000, 8000]:
            raise SoundFormatNotSupported(
                'Speech2Text sample rate must be 16000 or 8000 Hz')
        self.filename = filename
        if ext == ".flac":
            filetype = "x-flac"
        elif ext == ".wav":  # convert to .flac
            filetype = "x-flac"
            filename = wav2flac(filename, level=level)  # opt for speed
        logging.info("Loading: %s as %s, audio/%s" %
                     (self.filename, lang, filetype))
        # occasional error; core.wait(.1) is not always enough; better slow
        # than fail
        c = 0
        while not os.path.isfile(filename) and c < 10:
            core.wait(.1, 0)
            c += 1
        audio = open(filename, 'rb').read()
        if ext == '.wav' and filename.endswith('.flac'):
            try:
                os.remove(filename)
            except Exception:
                pass

        # urllib2 makes no attempt to validate the server certificate. here's an idea:
        # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/
        # set up the https request:
        url = 'https://' + host + '?xjerr=1&' +\
              'client=psychopy2&' +\
              'lang=' + lang + '&'\
              'pfilter=%d' % pro_filter + '&'\
              'maxresults=%d' % results
        header = {
            'Content-Type': 'audio/%s; rate=%d' % (filetype, samplingrate),
            'User-Agent': useragent
        }
        web.requireInternetAccess()  # needed to access google's speech API
        try:
            self.request = urllib.request.Request(url, audio, header)
        except Exception:  # pragma: no cover
            # try again before accepting defeat
            logging.info("https request failed. %s, %s. trying again..." %
                         (filename, self.filename))
            core.wait(0.2, 0)
            self.request = urllib.request.Request(url, audio, header)
예제 #12
0
 def setup_class(self):
     try:
         web.requireInternetAccess()
     except web.NoInternetAccessError:
         pytest.skip()
예제 #13
0
"""Demo to illustrate speech recognition using microphone.Speech2Text

Requires flac, which does not come with PsychoPy. You can get it free,
download & install from https://xiph.org/flac/download.html

Records 2.5s of speech, displays the word(s) spoken, and sets a color.
Can be used to show subjects how loudly and clearly they need to talk.
"""
__author__ = 'Jeremy Gray'

import os
from psychopy import visual, core, event, colors, web
from psychopy.microphone import switchOn, AudioCapture, Speech2Text

web.requireInternetAccess()  # needed to access google's speech API

def display(*args):
    [item.draw() for item in args]
    win.flip()

# PsychoPy only knows English color names; 'en-UK' might work better here for some speakers:
options = {'lang': 'en-US'}

# Set up visual things:
win = visual.Window(color=-0.05)
instr = visual.TextStim(win, text='say a color name when you see the microphone icon\n     green, hot pink, gold, fire brick, dark red, ...\n\npress q or say exit to quit;   expects "%s" input' % options['lang'], height=0.08, pos=(0,-0.4), wrapWidth=1.4)
word = visual.TextStim(win, text='', height=0.2, opacity=0.8, pos=(0,.25))
icon = visual.ImageStim(win, image='mic.png', opacity=0.6)
anykey = visual.TextStim(win, text='Press any key to start', pos=instr.pos-[0,.3], color='darkblue')
예제 #14
0
파일: microphone.py 프로젝트: klq/psychopy
    def __init__(self, filename,
                 lang='en-US',
                 timeout=10,
                 samplingrate=16000,
                 flac_exe='C:\\Program Files\\FLAC\\flac.exe',
                 pro_filter=2,
                 quiet=True):
        """
            :Parameters:

                `filename` : <required>
                    name of the speech file (.flac, .wav, or .spx) to process. wav files will be
                    converted to flac, and for this to work you need to have flac (as an
                    executable). spx format is speex-with-headerbyte (for google).
                `lang` :
                    the presumed language of the speaker, as a locale code; default 'en-US'
                `timeout` :
                    seconds to wait before giving up, default 10
                `samplingrate` :
                    the sampling rate of the speech clip in Hz, either 16000 or 8000. You can
                    record at a higher rate, and then down-sample to 16000 for speech
                    recognition. `file` is the down-sampled file, not the original.
                    the sampling rate is auto-detected for .wav files.
                `flac_exe` :
                    **Windows only**: path to binary for converting wav to flac;
                    must be a string with **two back-slashes where you want one** to appear
                    (this does not display correctly above, in the web documentation auto-build);
                    default is 'C:\\\\\\\\Program Files\\\\\\\\FLAC\\\\\\\\flac.exe'
                `pro_filter` :
                    profanity filter level; default 2 (e.g., f***)
                `quiet` :
                    no reporting intermediate details; default `True` (non-verbose)
        """
        # set up some key parameters:
        results = 5 # how many words wanted
        self.timeout = timeout
        useragent = PSYCHOPY_USERAGENT
        host = "www.google.com/speech-api/v1/recognize"
        flac_path = _getFlacPath(flac_exe)

        # determine file type, convert wav to flac if needed:
        ext = os.path.splitext(filename)[1]
        if not os.path.isfile(filename):
            raise IOError("Cannot find file: %s" % file)
        if ext not in ['.flac', '.spx', '.wav']:
            raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext)
        if ext == '.wav':
            __, samplingrate = readWavFile(filename)
        if samplingrate not in [16000, 8000]:
            raise SoundFormatNotSupported('Speech2Text sample rate must be 16000 or 8000 Hz')
        self.filename = filename
        if ext == ".flac":
            filetype = "x-flac"
        elif ext == ".spx":
            filetype = "x-speex-with-header-byte"
        elif ext == ".wav": # convert to .flac
            if not os.path.isfile(flac_path):
                msg = "failed to find flac binary, tried '%s'" % flac_path
                logging.error(msg)
                raise MicrophoneError(msg)
            filetype = "x-flac"
            tmp = tempfile.NamedTemporaryFile()
            flac_cmd = [flac_path, "-8", "-f", "--totally-silent", "-o", tmp.name, filename]
            __, se = core.shellCall(flac_cmd, stderr=True)
            if se: logging.warn(se)
            if not os.path.isfile(tmp.name): # just try again
                # ~2% incidence when recording for 1s, 650+ trials
                # never got two in a row; core.wait() does not help
                logging.warn('Failed to convert to tmp.flac; trying again')
                __, se = core.shellCall(flac_cmd, stderr=True)
                if se: logging.warn(se)
            filename = tmp.name
        logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype))
        try:
            c = 0 # occasional error; core.wait(.1) is not always enough; better slow than fail
            while not os.path.isfile(filename) and c < 10:
                core.wait(.1, hogCPUperiod=0)
                c += 1
            audio = open(filename, 'r+b').read()
        except:
            msg = "Can't read file %s from %s.\n" % (filename, self.filename)
            logging.error(msg)
            raise SoundFileError(msg)
        finally:
            try: os.remove(tmp)
            except: pass

        # urllib2 makes no attempt to validate the server certificate. here's an idea:
        # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/
        # set up the https request:
        url = 'https://' + host + '?xjerr=1&' +\
              'client=psychopy2&' +\
              'lang=' + lang +'&'\
              'pfilter=%d' % pro_filter + '&'\
              'maxresults=%d' % results
        header = {'Content-Type' : 'audio/%s; rate=%d' % (filetype, samplingrate),
                  'User-Agent': useragent}
        web.requireInternetAccess()  # needed to access google's speech API
        try:
            self.request = urllib2.Request(url, audio, header)
        except: # try again before accepting defeat
            logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename))
            core.wait(0.2, hogCPUperiod=0)
            self.request = urllib2.Request(url, audio, header)
예제 #15
0
"""Demo to illustrate speech recognition using microphone.Speech2Text

Requires flac, which does not come with PsychoPy. You can get it free,
download & install from https://xiph.org/flac/download.html

Records 2.5s of speech, displays the word(s) spoken, and sets a color.
Can be used to show subjects how loudly and clearly they need to talk.
"""
__author__ = 'Jeremy Gray'

import os
from psychopy import visual, core, event, colors, web
from psychopy.microphone import switchOn, AudioCapture, Speech2Text

web.requireInternetAccess()  # needed to access google's speech API

def display(*args):
    [item.draw() for item in args]
    win.flip()

# PsychoPy only knows English color names; 'en-UK' might work better here for some speakers:
options = {'lang': 'en-US'}

# Set up visual things:
win = visual.Window(color=-0.05)
instr = visual.TextStim(win, text='say a color name when you see the microphone icon\n     green, hot pink, gold, fire brick, dark red, ...\n\npress q or say exit to quit;   expects "%s" input' % options['lang'], height=0.08, pos=(0,-0.4), wrapWidth=1.4)
word = visual.TextStim(win, text='', height=0.2, opacity=0.8, pos=(0,.25))
icon = visual.ImageStim(win, image='mic.png', opacity=0.6)
anykey = visual.TextStim(win, text='Press any key to start', pos=instr.pos-[0,.3], color='darkblue')