Esempio n. 1
0
def start():
    speech = Pygsr()
    # duration in seconds
    speech.record(4)

    output = "log.csv"
    input = "audio.flac"
    if input[-5:] != '.flac':
        input += '*.flac'
    files = glob.glob(input)

    for flac in files:
        print "opening %s:" % flac
        valid_result = False
        tries = 0
        with open(flac, 'rb') as f:
            result = GoogleSpeechAPI(f)
            print "Audio is %.03f seconds long" % result.length
            f.seek(0)
            with Timer() as t:
                result.start()
        print "Result took %.03f sec" % t.interval
        print result.result

        out = ''.join(result.result)
        split3 = ""
        try:
            split1 = [x.strip() for x in out.split(':')]
            split2 = split1[3]
            split3 = [x.strip() for x in split2.split(',')]
            #[1:-1] is to remove double quotes from string
            print split3[0][1:-1]
            return split3[0][1:-1]
        except Exception, e:
            print "Exception"
Esempio n. 2
0
def witAI():
    #loading pygsr Python google speach recognition
    speech = Pygsr()
    raw_input("Ready ?")
    speech.record(5)  # duration in seconds (3)
    try:
        phrase, complete_response = speech.speech_to_text(
            'en_US')  # select the language
    except:
        phrase = "Tell me a joke !"
    print phrase

    phrase = phrase.strip().replace(" ", "%20").encode(
        'ascii')  #if not ascii, curl crashes

    #Wit.ai curl URL
    curl_url = "https://api.wit.ai/message?q=%s" % phrase
    #Auth headers
    curl_header = ["Authorization: Bearer YW3P2YITCYYXGHVLMIE7R7G7BBJODBG4"]
    #debug
    print curl_url
    answer = get_curl(curl_url, curl_header)
    print answer
    result = json.loads(answer)  #parse answer
    return result
Esempio n. 3
0
def start():
        speech = Pygsr()
    # duration in seconds
        speech.record(4)

        output = "log.csv"
        input = "audio.flac"
        if input[-5:] != '.flac':
            input += '*.flac'
        files = glob.glob(input)

        for flac in files:
            print "opening %s:" % flac
            valid_result = False
            tries = 0
            with open(flac, 'rb') as f:
                result = GoogleSpeechAPI(f)
                print "Audio is %.03f seconds long" % result.length
                f.seek(0)
                with Timer() as t:
                    result.start()
            print "Result took %.03f sec" % t.interval
            print result.result

            out = ''.join(result.result)
            split3 = ""
            try:
               split1 = [x.strip() for x in out.split(':')]
               split2 = split1[3]
               split3 = [x.strip() for x in split2.split(',')]
		#[1:-1] is to remove double quotes from string
               print split3[0][1:-1]
               return split3[0][1:-1]
            except Exception, e:
               print "Exception"
Esempio n. 4
0
    def srprocess(self,threadname):
        speech = Pygsr()
		speech.record(3)
		try:
		    phrase, complete_response = speech.speech_to_text('ar_AE')
		except:
		    phrase = ''
		    global do_flage
		    do_flage = False
		self.save_text(phrase+' ')     
Esempio n. 5
0
class HearingModule:
    def __init__(self):
        self.speech = Pygsr()

    def listen(self, t):
        self.speech.record(t)
        phrase, complete_response = self.speech.sepeech_to_text('en_EN')
        return phrase

    def connected(self):
        if self.speech:
            return True
        return False
Esempio n. 6
0
class Recorder:
    def __init__(self):
        global logger
        logger = logging.getLogger(__name__)
        self.speech = Pygsr()

    def record_command(self):
        ''' Records audio and sends it to google to translate to text.

        '''
        self.speech.record(settings.RECORD_LENGTH)
        result = self.speech.speech_to_text()
        line = None
        if result:
            line = result[0].lower()
        if not line:
            logger.warn('No command recorded.')
            return None
        logger.info(line)
        command = Command(line)
        logger.info(command)
        return command
Esempio n. 7
0
File: test_wit.py Progetto: Xqua/wit
def witAI():
	#loading pygsr Python google speach recognition
	speech = Pygsr()
	raw_input("Ready ?")
	speech.record(5) # duration in seconds (3)
	try:
		phrase, complete_response = speech.speech_to_text('en_US') # select the language
	except:
		phrase = "Tell me a joke !"
	print phrase
	
	phrase = phrase.strip().replace(" ","%20").encode('ascii') #if not ascii, curl crashes

	#Wit.ai curl URL
	curl_url = "https://api.wit.ai/message?q=%s"%phrase
	#Auth headers
	curl_header = ["Authorization: Bearer YW3P2YITCYYXGHVLMIE7R7G7BBJODBG4"]
	#debug
	print curl_url
	answer = get_curl(curl_url,curl_header)
	print answer
	result = json.loads(answer) #parse answer
	return result
Esempio n. 8
0
 def listen(self):
     speech = Pygsr()
     speech.record(2)
     phrase, complete_response = speech.speech_to_text('en_IN')
     print phrase
Esempio n. 9
0
 def _voice_input(self, duration):
     speech = Pygsr()
     speech.record(duration)
     phrase, complete_response = speech.speech_to_text(self.lang)
     return phrase
Esempio n. 10
0
from pygsr import Pygsr
import sys
import os

def set_proc_name(newname):
    from ctypes import cdll, byref, create_string_buffer
    libc = cdll.LoadLibrary('libc.so.6')
    buff = create_string_buffer(len(newname)+1)
    buff.value = newname
    libc.prctl(15, byref(buff), 0, 0, 0)

def get_proc_name():
    from ctypes import cdll, byref, create_string_buffer
    libc = cdll.LoadLibrary('libc.so.6')
    buff = create_string_buffer(128)
    # 16 == PR_GET_NAME from <linux/prctl.h>
    libc.prctl(16, byref(buff), 0, 0, 0)
    return buff.value

set_proc_name('Mwave_gspeech')

speech = Pygsr()
speech.record(2) # duration in seconds (3)
phrase, complete_response = speech.speech_to_text('en-US') # select the language
os.remove('audio')
os.remove('audio.flac')
print phrase
Esempio n. 11
0
from pygsr import Pygsr
speech = Pygsr()
# duration in seconds (3)
speech.record(3)
# select the language and obtain the result
phrase, complete_response = speech.speech_to_text('es_ES')
print phrase
Esempio n. 12
0
            f.write(opener.open(request).read())
            f.close()
	    Popen(['mplayer', 'data.mp3', '-really-quiet']).wait()
            #os.system('mplayer -ao alsa -noconsolecontrols data.mp3')

if __name__ == '__main__':
    x = Record()
    x.setup()
    recorded = False
    response = None
    while response is not 'exit':
        x.read()
        rms = audioop.rms(x.data, 2)
        print rms
        if rms > x.threshold:
            speech = Pygsr()
            speech.record(5)
            phrase, complete_response = speech.speech_to_text('en_EN')
            response = x.custom(phrase)
            if response == False:
                response = x.Wolfram(phrase)
                if response == False:
                    response = x.cleverbot(phrase)
            print('PHRASEPHRASEPHRASE')
            print(phrase)
            print(response)
            x.speak(response)
            recorded = True
            rms = 0
        x.setup()
            
Esempio n. 13
0
#!/usr/bin/python

from pygsr import Pygsr
speech = Pygsr()
speech.record(3)
phrase, complete_response = speech.speech_to_text('de_DE')
print phrase
Esempio n. 14
0
from pygsr import Pygsr
speech = Pygsr()
speech.record(3)  # duration in seconds (3)
phrase, complete_response = speech.speech_to_text(
    'es_ES')  # select the language
print phrase
Esempio n. 15
0
from pygsr import Pygsr

speech = Pygsr()
# duration in seconds (3)
speech.record(3)
# select the language and obtain the result
phrase, complete_response = speech.speech_to_text("es_ES")
print phrase
Esempio n. 16
0
 def __init__(self):
   Pygsr.__init__(self)
   self.active = False
   self.count_silence = 0
   return 
Esempio n. 17
0
from pygsr import Pygsr

speech = Pygsr()
# duration in seconds
speech.record(3)
# select the language
(phrase, complete_response) = speech.speech_to_text('en_US')

print(phrase)
Esempio n. 18
0
#!/usr/bin python
#coding: utf-8

from pygsr import Pygsr
speech = Pygsr()
speech.record(3) # duration in seconds (3)
response = speech.speech_to_text('es_ES') # select the language
print response
Esempio n. 19
0
 def __init__(self):
     global logger
     logger = logging.getLogger(__name__)
     self.speech = Pygsr()
Esempio n. 20
0
 def listen(self):
     speech = Pygsr()
     speech.record(2)
     phrase, complete_response = speech.speech_to_text('en_IN')
     print phrase
Esempio n. 21
0
 def __init__(self):
     self.speech = Pygsr()
Esempio n. 22
0
def main(wavFileName):

    ########################################################################################################################
    #wavFileName = "/Users/toine/Documents/speech_recognition/sound/sample/test.wav"
    wavFile = wave.open(wavFileName)
    (nchannels, sampwidth, framerate, nframes, comptype, compname) = wavFile.getparams()

    frames = wavFile.readframes(-1)
    npFrames = np.fromstring(frames, "Int16")


    ########################################################################################################################
    ## compute the spectrogram
    ## make sure FFT size is not too big for good accuracy
    nFft = 64
    nOverlap = 32
    fftWindow = nFft - nOverlap
    specgramFramerate = framerate / (fftWindow)

    ##TODO: check if this is needed
    ## pad the input for perfect FFT match
    ## npFrames = np.r_[npFrames, np.zeros(nFft - nframes % nFft)]

    ## spectrogram, return (Pxx, freqs, bins, im)
    # bins are the time points the spectrogram is calculated over
    # freqs is an array of frequencies
    # Pxx is an array of shape (len(times), len(freqs)) of power
    # im is a AxesImage instance
    (Pxx, freqs, bins, im) = plt.specgram(npFrames, Fs=framerate, NFFT=nFft, noverlap=nOverlap)
    #plt.show()
    plt.clf()

    ########################################################################################################################
    ## extract the voice frequencies
    ## voice frequency range, from 300Hz to 3500Hz
    # create a mask vector with these frequency taken from B
    # sum over the voice frequency range, voiceArray is 0's, but 1 when in voice frequency range
    f300Ind = lib.overflow(freqs, 300)
    f3500Ind = lib.overflow(freqs, 3500)
    voiceArray = np.zeros(len(freqs))
    voiceArray[f300Ind:f3500Ind] = 1
    ## dot product of the specgram
    voiceFreq = np.transpose(np.dot(np.transpose(Pxx), voiceArray))


    ########################################################################################################################
    ## compute the interesting minimums based on minimums and threshold
    #TODO: consider using the mlab/numpy function
    histData = plt.hist(voiceFreq, bins=100, range=(min(voiceFreq), np.mean(voiceFreq)))
    #plt.show()
    plt.clf()

    overflowPercent = 0.7
    overflowIndex = lib.overflow_hist(histData[0], overflowPercent)
    overflowValue = histData[1][overflowIndex]

    ## smooth the curve to find the minimums
    voiceFreqSmooth = lib.smooth(voiceFreq, 128)
    minimums = np.r_[True, voiceFreqSmooth[1:] < voiceFreqSmooth[:-1]] & \
               np.r_[voiceFreqSmooth[:-1] < voiceFreqSmooth[1:], True]

    ##TODO: change name
    ## create the array of cutting points, points are local minimums under the histogram threshold
    cutPoints = np.where(minimums & (voiceFreqSmooth < overflowValue))[0]


    ########################################################################################################################
    ## filter the minimums by roughly selecting one every 5 seconds
    # on npFrames, 5 sec = framerate * 5
    # on voiceFreq, framerate -> framerate/32
    avgSec = 3
    cutPointsNSec = [0]

    for pt in cutPoints:
        pt *= fftWindow  # convert cutPointsThres to npFrames framerate by multiplying with fftWindow
        if (pt - cutPointsNSec[-1]) > (framerate * avgSec):  # subtract the last value
            cutPointsNSec.append(pt)


    ########################################################################################################################
    ## create the cuts as additional files
    cutPointsNSecInSec = [(x / framerate) for x in cutPointsNSec]

    timestamp = []
    timestampNFrames = []
    for item1, item2 in lib.pairwise(cutPointsNSec, fillvalue=0):
        timestamp.append((item1, item2))
        timestampNFrames.append(item2 - item1)

    # geenrate the extension to the filename, e.g. filename.X_Y.wav for a cut from seconds X to Y
    addExtension = []
    timestampInSec = []
    for item1, item2 in lib.pairwise(cutPointsNSecInSec, fillvalue="end"):
        tmp = str(item1) + "_" + str(item2)
        timestampInSec.append((item1, item2))
        addExtension.append(tmp)


    logger = logging.getLogger(__name__)
    logger.debug("%s %s %s", timestamp, timestampNFrames, addExtension)
    logger.debug("%s %s %s", len(timestamp), len(timestampNFrames), len(addExtension))
    ## test on 1 file first
    #for (cutExt, cutTime, cutFrame) in zip(timestamp, timestampNFrames, addExtension):
    totalRes = []

    TESTINDEX = 6
    #TODO: take care of the last index, when cutPointNSecInSec is "end"
    for TESTINDEX in range(len(timestamp)-1):

        #TODO: make a lib function out of that
        splitName = path.basename(wavFileName).split(".")
        filename = path.dirname(wavFileName) + "/" + splitName[0] + "." + addExtension[TESTINDEX] + "." + splitName[1]

        wavChunk = wave.open(filename, "w")
        wavChunk.setparams((nchannels, sampwidth, framerate, timestampNFrames[TESTINDEX], comptype, compname))
        wavChunk.writeframes(npFrames[timestamp[TESTINDEX][0]:timestamp[TESTINDEX][1]].tostring())
        wavChunk.close()

        pygsr = Pygsr(filename)
        pygsr.convert()
        res = pygsr.speech_to_text("en", indx=TESTINDEX)
        totalRes.append(res)
        logger.debug("%s %s %s", TESTINDEX, addExtension[TESTINDEX], timestamp[TESTINDEX])

        h1 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][0]))+",200"
        h2 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][1]-1))+",800"

        logger.info("%s", TESTINDEX)
        logger.info("%s --> %s", h1, h2)
        logger.info("%s", res)
        logger.info("")

        #logger.debug("this should not appear in the srt file")

    logger.debug("%s", totalRes)

    return 1