def __init__(self, files, threads=3, verbose=False): """Like `Speech2Text()`, but takes a list of sound files or a directory name to search for matching sound files, and returns a list of `(filename, response)` tuples. `response`'s are described in `Speech2Text.getResponse()`. Can use up to 5 concurrent threads. Intended for post-experiment processing of multiple files, in which waiting for a slow response is not a problem (better to get the data). If `files` is a string, it will be used as a directory name for glob (matching all `*.wav`, `*.flac`, and `*.spx` files). There's currently no re-try on http error.""" list.__init__(self) # [ (file1, resp1), (file2, resp2), ...] maxThreads = min(threads, 5) # I get http errors with 6 self.timeout = 30 if type(files) == str and os.path.isdir(files): f = glob.glob(os.path.join(files, '*.wav')) f += glob.glob(os.path.join(files, '*.flac')) f += glob.glob(os.path.join(files, '*.spx')) fileList = f else: fileList = list(files) web.requireInternetAccess() # needed to access google's speech API for i, filename in enumerate(fileList): gs = Speech2Text(filename, level=5) self.append((filename, gs.getThread())) # tuple if verbose: logging.info("%i %s" % (i, filename)) while self._activeCount() >= maxThreads: core.wait(.1, 0) # idle at max count
def test_Speech2Text(self): pytest.skip() # google speech API gives Error 400: Bad request from psychopy import web try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, 'red_16000.wav') gs = microphone.Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == 'red' # test batch-discover files in a directory tmp = join(self.tmp, 'tmp') os.mkdir(tmp) shutil.copy(testFile, tmp) bs = microphone.BatchSpeech2Text(files=tmp) bs = microphone.BatchSpeech2Text( files=glob.glob(join(self.tmp, 'red_*.wav'))) while bs._activeCount(): core.wait(.1, 0) resp = bs[0][1] assert 0.6 < resp.confidence < 0.75 # 0.68801856 assert resp.word == 'red'
def test_Speech2Text(self): pytest.skip() # google speech API gives Error 400: Bad request try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, 'red_16000.wav') gs = Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == 'red' # test batch-discover files in a directory tmp = join(self.tmp, 'tmp') os.mkdir(tmp) shutil.copy(testFile, tmp) bs = BatchSpeech2Text(files=tmp) bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav'))) while bs._activeCount(): core.wait(.1, 0) resp = bs[0][1] assert 0.6 < resp.confidence < 0.75 # 0.68801856 assert resp.word == 'red'
def test_Speech2Text(self): try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, "red_16000.wav") gs = Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == "red" bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, "red_*.wav"))) os.unlink(join(self.tmp, "green_48000.wav")) bs = BatchSpeech2Text(files=self.tmp, threads=1)
def test_Speech2Text(self): try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, 'red_16000.wav') gs = Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == 'red' bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav'))) os.unlink(join(self.tmp, 'green_48000.wav')) bs = BatchSpeech2Text(files=self.tmp, threads=1)
def test_Speech2Text(self): try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, "red_16000.wav") gs = Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == "red" bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, "red_*.wav"))) while bs._activeCount(): core.wait(0.1, 0) resp = bs[0][1] assert 0.6 < resp.confidence < 0.75 # 0.68801856 assert resp.word == "red"
def test_Speech2Text(self): try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip() # load a known sound file testFile = join(self.tmp, 'red_16000.wav') gs = Speech2Text(filename=testFile) resp = gs.getResponse() assert resp.word == 'red' bs = BatchSpeech2Text(files=glob.glob(join(self.tmp, 'red_*.wav'))) while bs._activeCount(): core.wait(.1, 0) resp = bs[0][1] assert 0.6 < resp.confidence < 0.75 # 0.68801856 assert resp.word == 'red'
def __init__(self, filename, lang='en-US', timeout=10, samplingrate=16000, pro_filter=2, level=0): """ :Parameters: `filename` : <required> name of the speech file (.flac, .wav, or .spx) to process. wav files will be converted to flac, and for this to work you need to have flac (as an executable). spx format is speex-with-headerbyte (for Google). `lang` : the presumed language of the speaker, as a locale code; default 'en-US' `timeout` : seconds to wait before giving up, default 10 `samplingrate` : the sampling rate of the speech clip in Hz, either 16000 or 8000. You can record at a higher rate, and then down-sample to 16000 for speech recognition. `file` is the down-sampled file, not the original. the sampling rate is auto-detected for .wav files. `pro_filter` : profanity filter level; default 2 (e.g., f***) `level` : flac compression level (0 less compression but fastest) """ # set up some key parameters: results = 5 # how many words wanted self.timeout = timeout useragent = PSYCHOPY_USERAGENT host = "www.google.com/speech-api/v1/recognize" # determine file type, convert wav to flac if needed: if not os.path.isfile(filename): raise IOError("Cannot find file: %s" % filename) ext = os.path.splitext(filename)[1] if ext not in ['.flac', '.wav']: raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext) if ext == '.wav': _junk, samplingrate = readWavFile(filename) if samplingrate not in [16000, 8000]: raise SoundFormatNotSupported( 'Speech2Text sample rate must be 16000 or 8000 Hz') self.filename = filename if ext == ".flac": filetype = "x-flac" elif ext == ".wav": # convert to .flac filetype = "x-flac" filename = wav2flac(filename, level=level) # opt for speed logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype)) # occasional error; core.wait(.1) is not always enough; better slow # than fail c = 0 while not os.path.isfile(filename) and c < 10: core.wait(.1, 0) c += 1 audio = open(filename, 'rb').read() if ext == '.wav' and filename.endswith('.flac'): try: os.remove(filename) except Exception: pass # urllib2 makes no attempt to validate the server certificate. here's an idea: # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/ # set up the https request: url = 'https://' + host + '?xjerr=1&' +\ 'client=psychopy2&' +\ 'lang=' + lang + '&'\ 'pfilter=%d' % pro_filter + '&'\ 'maxresults=%d' % results header = {'Content-Type': 'audio/%s; rate=%d' % (filetype, samplingrate), 'User-Agent': useragent} web.requireInternetAccess() # needed to access google's speech API try: self.request = urllib.request.Request(url, audio, header) except Exception: # pragma: no cover # try again before accepting defeat logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename)) core.wait(0.2, 0) self.request = urllib.request.Request(url, audio, header)
def __init__(self, filename, lang="en-US", timeout=10, samplingrate=16000, pro_filter=2, quiet=True): """ :Parameters: `filename` : <required> name of the speech file (.flac, .wav, or .spx) to process. wav files will be converted to flac, and for this to work you need to have flac (as an executable). spx format is speex-with-headerbyte (for google). `lang` : the presumed language of the speaker, as a locale code; default 'en-US' `timeout` : seconds to wait before giving up, default 10 `samplingrate` : the sampling rate of the speech clip in Hz, either 16000 or 8000. You can record at a higher rate, and then down-sample to 16000 for speech recognition. `file` is the down-sampled file, not the original. the sampling rate is auto-detected for .wav files. `pro_filter` : profanity filter level; default 2 (e.g., f***) `quiet` : no reporting intermediate details; default `True` (non-verbose) """ # set up some key parameters: results = 5 # how many words wanted self.timeout = timeout useragent = PSYCHOPY_USERAGENT host = "www.google.com/speech-api/v1/recognize" flac_path = _getFlacPath() # determine file type, convert wav to flac if needed: if not os.path.isfile(filename): raise IOError("Cannot find file: %s" % filename) ext = os.path.splitext(filename)[1] if ext not in [".flac", ".spx", ".wav"]: raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext) if ext == ".wav": __, samplingrate = readWavFile(filename) if samplingrate not in [16000, 8000]: raise SoundFormatNotSupported("Speech2Text sample rate must be 16000 or 8000 Hz") self.filename = filename if ext == ".flac": filetype = "x-flac" elif ext == ".spx": filetype = "x-speex-with-header-byte" elif ext == ".wav": # convert to .flac filetype = "x-flac" filename = wav2flac(filename) logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype)) c = 0 # occasional error; core.wait(.1) is not always enough; better slow than fail while not os.path.isfile(filename) and c < 10: core.wait(0.1, 0) c += 1 try: audio = open(filename, "rb").read() except: msg = "Can't read file %s from %s.\n" % (filename, self.filename) logging.error(msg) raise SoundFileError(msg) finally: if ext == ".wav" and filename.endswith(".flac"): try: os.remove(filename) except: pass # urllib2 makes no attempt to validate the server certificate. here's an idea: # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/ # set up the https request: url = ( "https://" + host + "?xjerr=1&" + "client=psychopy2&" + "lang=" + lang + "&" "pfilter=%d" % pro_filter + "&" "maxresults=%d" % results ) header = {"Content-Type": "audio/%s; rate=%d" % (filetype, samplingrate), "User-Agent": useragent} web.requireInternetAccess() # needed to access google's speech API try: self.request = urllib2.Request(url, audio, header) except: # try again before accepting defeat logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename)) core.wait(0.2, 0) self.request = urllib2.Request(url, audio, header)
def __init__(self, filename, lang='en-US', timeout=10, samplingrate=16000, pro_filter=2, level=0): """ :Parameters: `filename` : <required> name of the speech file (.flac, .wav, or .spx) to process. wav files will be converted to flac, and for this to work you need to have flac (as an executable). spx format is speex-with-headerbyte (for Google). `lang` : the presumed language of the speaker, as a locale code; default 'en-US' `timeout` : seconds to wait before giving up, default 10 `samplingrate` : the sampling rate of the speech clip in Hz, either 16000 or 8000. You can record at a higher rate, and then down-sample to 16000 for speech recognition. `file` is the down-sampled file, not the original. the sampling rate is auto-detected for .wav files. `pro_filter` : profanity filter level; default 2 (e.g., f***) `level` : flac compression level (0 less compression but fastest) """ # set up some key parameters: results = 5 # how many words wanted self.timeout = timeout useragent = PSYCHOPY_USERAGENT host = "www.google.com/speech-api/v1/recognize" # determine file type, convert wav to flac if needed: if not os.path.isfile(filename): raise IOError("Cannot find file: %s" % filename) ext = os.path.splitext(filename)[1] if ext not in ['.flac', '.wav']: raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext) if ext == '.wav': _junk, samplingrate = readWavFile(filename) if samplingrate not in [16000, 8000]: raise SoundFormatNotSupported( 'Speech2Text sample rate must be 16000 or 8000 Hz') self.filename = filename if ext == ".flac": filetype = "x-flac" elif ext == ".wav": # convert to .flac filetype = "x-flac" filename = wav2flac(filename, level=level) # opt for speed logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype)) # occasional error; core.wait(.1) is not always enough; better slow # than fail c = 0 while not os.path.isfile(filename) and c < 10: core.wait(.1, 0) c += 1 audio = open(filename, 'rb').read() if ext == '.wav' and filename.endswith('.flac'): try: os.remove(filename) except Exception: pass # urllib2 makes no attempt to validate the server certificate. here's an idea: # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/ # set up the https request: url = 'https://' + host + '?xjerr=1&' +\ 'client=psychopy2&' +\ 'lang=' + lang + '&'\ 'pfilter=%d' % pro_filter + '&'\ 'maxresults=%d' % results header = { 'Content-Type': 'audio/%s; rate=%d' % (filetype, samplingrate), 'User-Agent': useragent } web.requireInternetAccess() # needed to access google's speech API try: self.request = urllib.request.Request(url, audio, header) except Exception: # pragma: no cover # try again before accepting defeat logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename)) core.wait(0.2, 0) self.request = urllib.request.Request(url, audio, header)
def setup_class(self): try: web.requireInternetAccess() except web.NoInternetAccessError: pytest.skip()
"""Demo to illustrate speech recognition using microphone.Speech2Text Requires flac, which does not come with PsychoPy. You can get it free, download & install from https://xiph.org/flac/download.html Records 2.5s of speech, displays the word(s) spoken, and sets a color. Can be used to show subjects how loudly and clearly they need to talk. """ __author__ = 'Jeremy Gray' import os from psychopy import visual, core, event, colors, web from psychopy.microphone import switchOn, AudioCapture, Speech2Text web.requireInternetAccess() # needed to access google's speech API def display(*args): [item.draw() for item in args] win.flip() # PsychoPy only knows English color names; 'en-UK' might work better here for some speakers: options = {'lang': 'en-US'} # Set up visual things: win = visual.Window(color=-0.05) instr = visual.TextStim(win, text='say a color name when you see the microphone icon\n green, hot pink, gold, fire brick, dark red, ...\n\npress q or say exit to quit; expects "%s" input' % options['lang'], height=0.08, pos=(0,-0.4), wrapWidth=1.4) word = visual.TextStim(win, text='', height=0.2, opacity=0.8, pos=(0,.25)) icon = visual.ImageStim(win, image='mic.png', opacity=0.6) anykey = visual.TextStim(win, text='Press any key to start', pos=instr.pos-[0,.3], color='darkblue')
def __init__(self, filename, lang='en-US', timeout=10, samplingrate=16000, flac_exe='C:\\Program Files\\FLAC\\flac.exe', pro_filter=2, quiet=True): """ :Parameters: `filename` : <required> name of the speech file (.flac, .wav, or .spx) to process. wav files will be converted to flac, and for this to work you need to have flac (as an executable). spx format is speex-with-headerbyte (for google). `lang` : the presumed language of the speaker, as a locale code; default 'en-US' `timeout` : seconds to wait before giving up, default 10 `samplingrate` : the sampling rate of the speech clip in Hz, either 16000 or 8000. You can record at a higher rate, and then down-sample to 16000 for speech recognition. `file` is the down-sampled file, not the original. the sampling rate is auto-detected for .wav files. `flac_exe` : **Windows only**: path to binary for converting wav to flac; must be a string with **two back-slashes where you want one** to appear (this does not display correctly above, in the web documentation auto-build); default is 'C:\\\\\\\\Program Files\\\\\\\\FLAC\\\\\\\\flac.exe' `pro_filter` : profanity filter level; default 2 (e.g., f***) `quiet` : no reporting intermediate details; default `True` (non-verbose) """ # set up some key parameters: results = 5 # how many words wanted self.timeout = timeout useragent = PSYCHOPY_USERAGENT host = "www.google.com/speech-api/v1/recognize" flac_path = _getFlacPath(flac_exe) # determine file type, convert wav to flac if needed: ext = os.path.splitext(filename)[1] if not os.path.isfile(filename): raise IOError("Cannot find file: %s" % file) if ext not in ['.flac', '.spx', '.wav']: raise SoundFormatNotSupported("Unsupported filetype: %s\n" % ext) if ext == '.wav': __, samplingrate = readWavFile(filename) if samplingrate not in [16000, 8000]: raise SoundFormatNotSupported('Speech2Text sample rate must be 16000 or 8000 Hz') self.filename = filename if ext == ".flac": filetype = "x-flac" elif ext == ".spx": filetype = "x-speex-with-header-byte" elif ext == ".wav": # convert to .flac if not os.path.isfile(flac_path): msg = "failed to find flac binary, tried '%s'" % flac_path logging.error(msg) raise MicrophoneError(msg) filetype = "x-flac" tmp = tempfile.NamedTemporaryFile() flac_cmd = [flac_path, "-8", "-f", "--totally-silent", "-o", tmp.name, filename] __, se = core.shellCall(flac_cmd, stderr=True) if se: logging.warn(se) if not os.path.isfile(tmp.name): # just try again # ~2% incidence when recording for 1s, 650+ trials # never got two in a row; core.wait() does not help logging.warn('Failed to convert to tmp.flac; trying again') __, se = core.shellCall(flac_cmd, stderr=True) if se: logging.warn(se) filename = tmp.name logging.info("Loading: %s as %s, audio/%s" % (self.filename, lang, filetype)) try: c = 0 # occasional error; core.wait(.1) is not always enough; better slow than fail while not os.path.isfile(filename) and c < 10: core.wait(.1, hogCPUperiod=0) c += 1 audio = open(filename, 'r+b').read() except: msg = "Can't read file %s from %s.\n" % (filename, self.filename) logging.error(msg) raise SoundFileError(msg) finally: try: os.remove(tmp) except: pass # urllib2 makes no attempt to validate the server certificate. here's an idea: # http://thejosephturner.com/blog/2011/03/19/https-certificate-verification-in-python-with-urllib2/ # set up the https request: url = 'https://' + host + '?xjerr=1&' +\ 'client=psychopy2&' +\ 'lang=' + lang +'&'\ 'pfilter=%d' % pro_filter + '&'\ 'maxresults=%d' % results header = {'Content-Type' : 'audio/%s; rate=%d' % (filetype, samplingrate), 'User-Agent': useragent} web.requireInternetAccess() # needed to access google's speech API try: self.request = urllib2.Request(url, audio, header) except: # try again before accepting defeat logging.info("https request failed. %s, %s. trying again..." % (filename, self.filename)) core.wait(0.2, hogCPUperiod=0) self.request = urllib2.Request(url, audio, header)