Ejemplo n.º 1
0
def startRecording():
    # Sampling rate of your microphone and desired chunk size
    rate = 30000
    chunk = int(rate / 10)

    # Insert your access token here
    access_token = "02LbSIzUcbt6GPr1QiSYxiD63PgA2qfbzwrg_9lWTog3V3XDJ7Vr9NBUE2Y2DASDLncuR6V561WfxTr8hAibTispvwDiI"

    # Creates a media config with the settings set for a raw microphone input
    example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

    streamclient = RevAiStreamingClient(access_token, example_mc)

    # Opens microphone input. The input will stop after a keyboard interrupt.
    with MicrophoneStream(rate, chunk) as stream:
        fiveWords = queue.Queue(maxsize=5)
        # Uses try method to allow users to manually close the stream
        try:
            # Starts the server connection and thread sending microphone audio
            response_gen = streamclient.start(stream.generator())

            # Iterates through responses and prints them
            for response in response_gen:
                response = json.loads(response)
                #print(response)
                if (response['type'] == "partial"):
                    for element in response['elements']:
                        if element['value'] not in fiveWords:
                            print(element['value'])
                            fiveWords.push(element['value'])

        except KeyboardInterrupt:
            # Ends the websocket connection.
            streamclient.client.send("EOS")
            pass
Ejemplo n.º 2
0
def getRev_ai():
    print('get rev ai started')
    # Sampling rate of your microphone and desired chunk size
    rate = 48000
    chunk = int(rate / 10)

    # Insert your access token here
    access_token = "02KT6QPJ8XPl0HTqTglpdvZeohnNwaUldCBPJOP_QKTu5JtUsNfUXIC-O_oniEwmpw3QxPjTfEKjVCX33xfwWkai9ypo0"

    # Creates a media config with the settings set for a raw microphone input
    example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

    streamclient = RevAiStreamingClient(access_token, example_mc)

    # Opens microphone input. The input will stop after a keyboard interrupt.
    with MicrophoneStream(rate, chunk, time.time()) as stream:
        # Uses try method to allow users to manually close the stream
        try:
            # Starts the server connection and thread sending microphone audio
            response_gen = streamclient.start(stream.generator())

            # Iterates through responses and prints them
            for response in response_gen:
                try:
                    if json.loads(response)["type"] != 'final':
                        elements = json.loads(response)["elements"]
                        print([a["value"] for a in elements])
                        global latest_phrase
                        global diff
                        diff = stream.a_diff
                        values = [a["value"] for a in elements]
                        if '<unk>' in values:
                            values.remove('<unk>')
                        latest_phrase = values
                except:
                    print(response)

        except KeyboardInterrupt:
            # Ends the websocket connection.
            streamclient.client.send("EOS")
            pass
                    break

            yield b''.join(data)


# Sampling rate of your microphone and desired chunk size
rate = 44100
chunk = int(rate / 10)

# Insert your access token here
access_token = "your_access_token"

# Creates a media config with the settings set for a raw microphone input
example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

streamclient = RevAiStreamingClient(access_token, example_mc)

# Opens microphone input. The input will stop after a keyboard interrupt.
with MicrophoneStream(rate, chunk) as stream:
    # Uses try method to allow users to manually close the stream
    try:
        # Starts the server connection and thread sending microphone audio
        response_gen = streamclient.start(stream.generator())

        #Iterates through responses and prints them
        for response in response_gen:
            print(response)

    except KeyboardInterrupt:
        # Ends the websocket connection.
        streamclient.client.send("EOS")
Ejemplo n.º 4
0
                    break

            yield b''.join(data)


# Sampling rate of your microphone and desired chunk size
rate = 44100
chunk = int(rate / 10)

# Insert your access token here
access_token = "02b0egd4tR8iJRFRkCDMmGjkkyDnyAIg4o7NDKLP7AtiWlKhXRTwzuhJIFPBlIswgUy69Sc1kQTe_ekYv2P1pWUeNwN3c"

# Creates a media config with the settings set for a raw microphone input
example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

streamclient = RevAiStreamingClient(access_token, example_mc)

# Opens microphone input. The input will stop after a keyboard interrupt.
with MicrophoneStream(rate, chunk) as stream:
    # Uses try method to allow users to manually close the stream
    try:
        # Starts the server connection and thread sending microphone audio
        response_gen = streamclient.start(stream.generator(),
                                          filter_profanity=False)

        # Iterates through responses and prints them
        for response in response_gen:
            print(response)
            continue
            data = json.loads(response)
            if data["type"] != "final":
Ejemplo n.º 5
0
Archivo: pres2t.py Proyecto: rytse/lyre
target = sys.argv[1]
print(f'Target: {target}')

filename = f'{target}.raw'

# String of your access token
access_token = '021RBqj26AzVFWw7gkJ_mYOTBxdQoDikYFCDdyGFznouU9tioCAwqRgQOW2-iihk0ZhQ06vS0lxB1RsybMGSr7B0iwhL4'

# Media configuration of audio file.
# This includes the content type, layout, rate, format, and # of channels
#config = MediaConfig("audio/x-raw", "interleaved", 16000, "S16LE", 1)
config = MediaConfig("audio/x-raw", "interleaved", 44100, "S16LE", 1)
#config = MediaConfig("audio/x-raw", "stereo", 44100, "S16LE", 2)

# Create client with your access token and media configuration
streamclient = RevAiStreamingClient(access_token, config)

# Open file and read data into array.
# Practically, stream data would be divided into chunks
with io.open(filename, 'rb') as stream:
    MEDIA_GENERATOR = [stream.read()]

# Starts the streaming connection and creates a thread to send bytes from the
# MEDIA_GENERATOR. response_generator is a generator yielding responses from
# the server
response_generator = streamclient.start(MEDIA_GENERATOR)

# Iterates through the responses from the server when obtained
print(type(response_generator))
msg = ''
tmsg = ''
Ejemplo n.º 6
0
def index():
    if request.method == 'POST':  #this block is only entered when the form is submitted
        originalInput = request.form['userInput']
        userInput = originalInput.translate(str.maketrans("","", string.punctuation)).lower()
        userInput = userInput.split(' ')
        # Sampling rate of your microphone and desired chunk size
        rate = 44100
        chunk = int(rate/10)

        # Insert your access token here
        access_token = "02RwwjqE2Tvzhs6EzGJlGqrpj71gT6SfeEKLGYi-LuZ1inGQsTOAOob1OYjly5ShYrunkVAfk3EHKMI3gFtFt1WvlrH1k"

        # Creates a media config with the settings set for a raw microphone input
        example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

        streamclient = RevAiStreamingClient(access_token, example_mc)

        # Opens microphone input. The input will stop after a keyboard interrupt.
        with MicrophoneStream(rate, chunk) as stream:
            # Uses try method to allow users to manually close the stream
            try:
                # Starts the server connection and thread sending microphone audio
                response_gen = streamclient.start(stream.generator())

                curr_response = None
                # Iterates through responses and prints them
                textFromSpeech = []
                for response in response_gen:
                    print(response)
                    response = json.loads(response)
                    if response["type"] == "final":
                        for elt in response["elements"]:
                            val = elt["value"].lower()
                            if (val != ' ' and val != '.' and val != ',' and val != '?' and val!='!'):
                                textFromSpeech.append(val)
                                if len(textFromSpeech) > len(userInput) or val != userInput[len(textFromSpeech)-1]:
                                    print("lol2")
                                    raise WrongWord
                            print(textFromSpeech)
                            print(userInput)
                    if len(textFromSpeech) >= len(userInput):
                        print("lol")
                        raise DoneStreaming    
            except DoneStreaming:
                return render_template('correct.html')
                # Ends the websocket connection.
                streamclient.client.send("EOS")
                pass
            except WrongWord:
                wrongText = textFromSpeech
                wrongWord = wrongText[-1]
                rightText = userInput
                #rightWord = "lol"
                rightWord = userInput[len(wrongText)-1]
                wordImage = bingImageSearch.findImage(rightWord)
                textToSpeech.getAudio(rightWord)
                return render_template('wrong.html', wrongText = wrongText, rightText = rightText, wrongWord = wrongWord, wordImage = wordImage, rightWord=rightWord)
                # Ends the websocket connection.
                streamclient.client.send("EOS")
                pass
    #if startMicrophone == True:

    return render_template('index.html', hasInput = False)
Ejemplo n.º 7
0
from six.moves import queue
import numpy as np
import ast
from sklearn.neighbors import KDTree

# Sampling rate of your microphone and desired chunk size
rate = 44100
chunk = int(rate / 10)

# Insert your access token here
access_token = "02F4Okh0ju6Ug5Yq-VoSAsLRBdUVbD71P0m_-MoqBy4HJ0YjwHajPeQh4kFWqj4RZHBnacBJC-Tx7TAZ7ah6sPlnTim7Q"

# Creates a media config with the settings set for a raw microphone input
example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

streamclient = RevAiStreamingClient(access_token, example_mc)

SPEAKERS = queue.Queue()


class VoiceRecognition():
    def __init__(self, calibration_vectors_dict):
        calibration_vectors = []
        self.all_names = []
        for name in calibration_vectors_dict:
            self.all_names.append(name)
            calibration_vectors += calibration_vectors_dict[name]
        self.kd_tree = KDTree(np.array(calibration_vectors))

    def predict(self, voice_vector):
        dist, ind = self.kd_tree.query([voice_vector])
Ejemplo n.º 8
0
def executeStreaming(socketio, status):
    # Sampling rate of your microphone and desired chunk size
    rate = 44100
    chunk = int(rate / 10)

    # Insert your access token here
    access_token = "02_qnlgLZ05eoZPxN89yiouX3gTB86Dsw1uHIlgjSRbKt536KESupmymFaQOYTEMBi1_nR28sgGlSVyidxCBjGzYVLgNk"

    # Creates a media config with the settings set for a raw microphone input
    example_mc = MediaConfig('audio/x-raw', 'interleaved', 44100, 'S16LE', 1)

    streamclient = RevAiStreamingClient(access_token, example_mc)
    start_time = time.time()

    # Opens microphone input. The input will stop after a keyboard interrupt.
    with MicrophoneStream(rate, chunk) as stream:
        # Uses try method to allow users to manually close the stream
        try:
            # Starts the server connection and thread sending microphone audio
            if status == "open":
                response_gen = streamclient.start(stream.generator())
                # Iterates through responses and prints them
                elements = ""
                resp = ""
                ct = 0
                for response in response_gen:
                    # print(response)
                    resp = json.loads(response)

                    # if (resp["type"]=="final"):
                    #     elements=resp["elements"]
                    elements = resp["elements"]
                    sentense_type = resp["type"]
                    txt = ""
                    if sentense_type == "partial":
                        for val in elements:
                            #print(val["value"])
                            if (val["type"] == "punct"):
                                txt = txt + val["value"]
                            else:
                                txt = txt + val["value"] + " "
                            #print(txt)
                    else:
                        for val in elements:
                            txt = txt + val["value"]
                        ct += 1

                    if (ct == 5):
                        ct = 0
                        sec = datetime.timedelta(seconds=int(time.time() -
                                                             start_time))
                        tstamp = datetime.datetime(1, 1, 1) + sec
                        timestamp = "[" + str(tstamp.hour) + ":" + str(
                            tstamp.minute) + ":" + str(tstamp.second) + "]"
                        txt = txt + " " + str(timestamp)

                    socketio.emit('my data', {
                        'content': txt,
                        'type': resp["type"],
                    })
            else:
                # streamclient.end()
                stream._clear_buffer()

        except KeyboardInterrupt:
            # Ends the websocket connection.
            streamclient.client.send("EOS")
            pass