Ejemplo n.º 1
0
def speech2text(audio_file_path):
    """
    This function takes an audio clip in wav format and returns the text using the Watson speech API by IBM
    Args:
        audio_file_path (str) : File path of the audio file
    Returns:
        text (str) : Corresponding text
    """
    try:
        api_json = open('api.json')
        api_json = json.load(api_json)
        authenticator = IAMAuthenticator(api_json['apikey'])
        speech_to_text = SpeechToTextV1(authenticator=authenticator)

        speech_to_text.set_service_url(api_json['url'])

        speech_to_text.set_disable_ssl_verification(True)

        with open(audio_file_path, 'rb') as audio_file:
            speech_recognition_results = speech_to_text.recognize(
                audio=audio_file, content_type='audio/wav').get_result()
        text = ''
        results = speech_recognition_results['results']
        for item in results:
            transcipt = item['alternatives'][0]['transcript']
            text = text + ' ' + transcipt
        return text
    except Exception as exp:
        print(f"Failed in speech2text with error {exp}")
Ejemplo n.º 2
0
    def __init__(self,
                 api_key,
                 api_url,
                 noise_adjust_time=0,
                 echo=False,
                 phrase_time_limit=None):
        # Watson cloud authentication:
        authenticator = IAMAuthenticator(api_key)
        self.service = SpeechToTextV1(authenticator=authenticator)
        self.service.set_service_url(api_url)

        #callback class:
        self.audio_callback = recognizeCallback(self)

        #setup speech_recognition to capture audio from mic:
        self.recognizer = sr.Recognizer()
        self.mic = sr.Microphone()
        self.string_queue = Queue()
        if noise_adjust_time > 0:
            with self.mic as source:
                self.recognizer.adjust_for_ambient_noise(
                    source, duration=noise_adjust_time)

        self.echo = echo
        self.phrase_time_limit = phrase_time_limit
        self.background_listening = False

        self.beep = sa.WaveObject.from_wave_file("beep.wav")
        self.beep2 = sa.WaveObject.from_wave_file("beep2.wav")
Ejemplo n.º 3
0
    def __init__(self, audio_device):
        self.is_supported = is_supported
        if not self.is_supported:
            return

        self.audio_device = audio_device

        APIKEY = None
        URL = None
        with open(speakreader.CONFIG.IBM_CREDENTIALS_FILE) as f:
            for line in f.read().splitlines():
                parm = line.split('=')
                if parm[0] == 'SPEECH_TO_TEXT_APIKEY':
                    APIKEY = parm[1]
                if parm[0] == 'SPEECH_TO_TEXT_URL':
                    URL = parm[1]

        if APIKEY is None or URL is None:
            logger.warn('ibmTranscribe: APIKEY or URL not found in credentials file')

        # initialize speech to text service
        self.authenticator = IAMAuthenticator(APIKEY)
        self.speech_to_text = SpeechToTextV1(authenticator=self.authenticator)
        self.speech_to_text.set_service_url(URL)
        self.mycallback = ProcessResponses()

        self.audio_source = AudioSource(audio_device._streamBuff, is_recording=True, is_buffer=True)
Ejemplo n.º 4
0
def getTextFromSpeech():
    tts_kwargs = {
            'username': speechToTextUser,
            'password': speechToTextPassword,
            'iam_apikey': speechToTextIAMKey,
            'url': speechToTextUrl
    }

    sttService = SpeechToTextV1(**tts_kwargs)

    response = sttService.recognize(
            audio=request.get_data(cache=False),
            content_type='audio/wav',
            model = 'ja-JP_BroadbandModel',
            timestamps=True,
            word_confidence=True).get_result()

    if len(response['results']):
    
        text_output = response['results'][0]['alternatives'][0]['transcript']
    
    else:
        text_output = '';
        
    return Response(response=text_output, mimetype='plain/text')
Ejemplo n.º 5
0
    def transcribe_audio(self):
        # initialize speech to text service
        speech_to_text = SpeechToTextV1(
            iam_apikey='9MXnNlJ3iDrKTsvBYVF5IR3CLVbCHkkL1fhGaRySFsEe',
            url='https://stream.watsonplatform.net/speech-to-text/api')

        with open((self.path_to_audio_file), 'rb') as audio_file:
            speech_result = speech_to_text.recognize(
                audio=audio_file,
                content_type='audio/wav',
                word_alternatives_threshold=0.9,
                keywords=['hey', 'hi', 'watson', 'friend', 'meet'],
                keywords_threshold=0.5
            ).get_result()

            speech_text = speech_result['results'][0]['alternatives'][0]['transcript']
            print("User Speech Text: " + speech_text + "\n")

            input_text = {
                'workspace_id': workspace_id,
                'input': {
                    'text': speech_text
                }
            }

        return input_text
Ejemplo n.º 6
0
def authenticate():
    """
    Uses the key and url from the cloud service to create the IBM authenticator.
    :rtype: SpeechToTextV1
    """
    authenticator = IAMAuthenticator(credentials.API_KEY_STT)
    service = SpeechToTextV1(authenticator=authenticator)
    service.set_service_url(credentials.URL_STT)
    return service
def activate():
    # initialize speech to text service
    authenticator = IAMAuthenticator(
        'yTSSJ5GSmGhgIA95KnVPDf61KSZinztq909UBMfoqh7l')
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(
        "https://api.us-east.speech-to-text.watson.cloud.ibm.com/instances/77c94867-643f-431b-a593-0bc775c18bb7"
    )
    return speech_to_text
Ejemplo n.º 8
0
def res(): 
    print ('inicia') 
    
    audio = pyaudio.PyAudio()
    stream = audio.open(format = pyaudio.paInt16, channels = 2, rate = 44100, frames_per_buffer = 1024, input = True)
    print('Inicia grabación')
        
    frames = []
    tiempo = 5 #segundos
    for i in range(0, int(44100/1024*tiempo)):
        tmp = stream.read(1024)
        frames.append(tmp)
            
    print('Acaba la captura')
    stream.stop_stream()
    stream.close()
        
    waveFile = wave.open('graba.wav','wb')
    waveFile.setnchannels(2)
    waveFile.setframerate(44100)
    waveFile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
    waveFile.writeframes(b''.join(frames))
    waveFile.close()

    servicio = SpeechToTextV1(iam_apikey ='id de watson') 
    with open('graba.wav','rb') as fichero:


        res = json.dumps(servicio.recognize(audio = fichero,timestamps = True,model = 'es-ES_NarrowbandModel', word_confidence = True).get_result(),sort_keys=True ,indent = 2)
        resultado=json.loads(res)
        res=resultado["results"]
        res=res[0]
        res=res["alternatives"]
        res=res[0]
        res=res["transcript"]
             
        print(res)
    audio= res
    #----------------------------------------traduccion----------------------------------------------
    language_translator = LanguageTranslatorV3(
    version='2018-05-01',
    iam_apikey='id de watson',
    url='https://gateway.watsonplatform.net/language-translator/api')
    
    translation = language_translator.translate(
    text=res, model_id='es-en').get_result()
    traduccion=json.dumps(translation, indent=2, ensure_ascii=False)
       
    #print(traduccion)
    traduccion=json.loads(traduccion)
    traduccion= traduccion["translations"]
    traduccion= traduccion[0]
    traduccion= traduccion["translation"]
    print(traduccion)
    traduccion1=traduccion
    ##-----------------------------------------------analizador
    tone_analyzer = ToneAnalyzerV3(version = '2017-09-21',iam_apikey = 'id de watson, url = 'https://gateway.watsonplatform.net/tone-analyzer/api'  )
Ejemplo n.º 9
0
    def _get_transcriber(self) -> SpeechToTextV1:
        url = config.ibmwatson.endpoint
        key = config.ibmwatson.api_key

        authenticator = IAMAuthenticator(key)
        transcriber = SpeechToTextV1(authenticator=authenticator)
        transcriber.set_service_url(url)

        return transcriber
Ejemplo n.º 10
0
def init_stt(config: ConfigParser):
    creds = config['CREDENTIALS']
    url = creds.get('url')
    authenticator = choose_authenticator(config)

    stt = SpeechToTextV1(
        authenticator=authenticator
    )
    stt.set_service_url(url)
    return stt
Ejemplo n.º 11
0
def transcribe_audio_with_watson(path_to_audio_file):
    authenticator = IAMAuthenticator(os.getenv('SPEECH_TO_TEXT_APIKEY'))
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(os.getenv("SPEECH_TO_TEXT_URL"))

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        response = speech_to_text.recognize(
            audio_file,
            content_type='audio/wav',
            model='en-US_NarrowbandModel').get_result()
        return response['results'][0]['alternatives'][0]['transcript']
Ejemplo n.º 12
0
def voice_solver(random):
    apikey = 'AJkX0XwX5FdlwhpNJFBFcfr06feOxrO1uZPgPypBdwlc'
    url = 'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/1906bf89-44a9-4a16-b92a-47abb56a5c51'
    authenticator = IAMAuthenticator(apikey)
    stt = SpeechToTextV1(authenticator=authenticator)
    stt.set_service_url(url)
    with open('voice/' + random + '.mp3', 'rb') as f:
        res = stt.recognize(audio=f, content_type='audio/mp3', model='en-US_NarrowbandModel',
                            continuous=True).get_result()
    output = res['results'][0]['alternatives'][0]['transcript']
    return output
Ejemplo n.º 13
0
    def _wrapper_ibm_stt(self, audio):

        speech_to_text = SpeechToTextV1(
            iam_apikey="iB_ldfwxsW5QLIfKR_G629EzkiZrb-gr1kDo45tbE2fb",
            url="https://gateway-tok.watsonplatform.net/speech-to-text/api")

        speech_recognition_results = speech_to_text.recognize(
            audio=audio,
            content_type='audio/flac',
            word_alternatives_threshold=0.9).get_result()

        return speech_recognition_results
Ejemplo n.º 14
0
def getTranscriptForUploadedAudio(mp3File):
    authenticator = IAMAuthenticator(
        'JNJkBoGNQKYihv3CPqXMtuKlgAVQcFrunct_mV2Yv4cx')
    STT_service = SpeechToTextV1(authenticator=authenticator)
    STT_service.set_service_url(
        'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/816f28bc-9729-48ca-b11a-c736524e6ad6'
    )
    with open(os.path.join(os.path.dirname('__file__'), mp3File),
              'rb') as audio_file:
        transcript = STT_service.recognize(audio=audio_file,
                                           timestamps=True).get_result()
        return transcript
Ejemplo n.º 15
0
def speech_to_text(in_file):
    """
    Convert audio speech to text via IBM API
    """
    with open(in_file, mode="rb") as wav:
        authenticator_s2t = IAMAuthenticator(os.environ.get('S2T_KEY'))
        s2t = SpeechToTextV1(authenticator=authenticator_s2t)
        s2t.set_service_url(os.environ.get('S2T_URL'))
        response = s2t.recognize(audio=wav, content_type='audio/mp3')
        recognized_text = response.result['results'][0]["alternatives"][0][
            "transcript"]
        return recognized_text
def retrieve_transcript(identifier,
                        language,
                        speaker_type,
                        service_config,
                        phone=False):
    try:
        s3_items = identifier.split('/')
        s3_resource = boto3.resource('s3')
        bucket = s3_resource.Bucket(s3_items[0])
        extension = Path(s3_items[1]).suffix[1:]
        if extension == 'wav':
            local_file = f"{uuid.uuid4()}.wav"
            content_type = "audio/wav"
        else:
            local_file = f"{uuid.uuid4()}.mp3"
            content_type = "audio/mp3"
        bucket.download_file(s3_items[1], local_file)
    finally:
        bucket.objects.delete()
        bucket.delete()
    authenticator = IAMAuthenticator(service_config["api_key"])
    speech_to_text = SpeechToTextV1(authenticator=authenticator)

    speech_to_text.set_service_url(service_config["service_url"])

    if phone:
        model = f"{language}_NarrowbandModel"
    else:
        model = f"{language}_BroadbandModel"

    with open(local_file, 'rb') as audio_file:
        recognition_job = speech_to_text.create_job(
            audio_file,
            model=model,
            content_type=content_type,
            results_ttl=60,
            inactivity_timeout=-1,
            timestamps=True,
            speaker_labels=
            False,  # right now, there is no diarization for Brazilian Portuguese
            word_confidence=True,
            profanity_filter=False).get_result()

    while recognition_job['status'] in ('waiting', 'processing'):
        sleep(1000)
        recognition_job = speech_to_text.check_job(
            recognition_job['id']).get_result()

    if recognition_job['status'] == 'failed':
        raise Exception(json.dumps(recognition_job, indent=2))
    else:
        return recognition_job
def initServices(app):
    # Setup MQTT
    app.config['MQTT_BROKER_URL'] = 'test.mosquitto.org'
    app.config['MQTT_BROKER_PORT'] = 1883
    mqtt = Mqtt(app)
    app.config['MQTT_CLIENT'] = mqtt

    # Setup IBM Watson
    load_dotenv()
    authenticator = IAMAuthenticator(os.getenv("STT_API_KEY"))
    service = SpeechToTextV1(authenticator=authenticator)
    service.set_service_url(os.getenv("STT_URL"))
    app.config['SPEECH_TO_TEXT'] = service

    authenticator_translate = IAMAuthenticator(os.getenv("TRANSLATE_API_KEY"))
    language_translator = LanguageTranslatorV3(
        version='2018-05-01', authenticator=authenticator_translate)
    language_translator.set_service_url(os.getenv("TRANSLATE_URL"))
    app.config['LANGUAGE_TRANSLATOR'] = language_translator

    # IBM COS
    app.config['COS_ENDPOINT'] = os.getenv("COS_ENDPOINT")
    if not app.config['COS_ENDPOINT'].startswith(
            'http') or not app.config['COS_ENDPOINT'].startswith('https'):
        app.config['COS_ENDPOINT'] = 'https://' + app.config['COS_ENDPOINT']

    cos = ibm_boto3.resource(
        "s3",
        ibm_api_key_id=os.getenv("COS_API_KEY"),
        ibm_service_instance_id=os.getenv("COS_IAM_ROLE_CRN"),
        ibm_auth_endpoint='https://iam.cloud.ibm.com/identity/token',
        config=Config(signature_version="oauth"),
        endpoint_url=app.config['COS_ENDPOINT'])
    app.config['COS'] = cos
    app.config['COS_BUCKET_NAME'] = os.getenv("COS_BUCKET_NAME")

    # Setup config
    # app.config['BASE'] = os.path.join(os.path.dirname(os.getcwd()),'cfc-covid-19-video-transcriber-starter')
    app.config['BASE'] = os.path.join(os.path.dirname(os.getcwd()), '/app')
    app.config['BASE'] = os.path.join(app.config['BASE'], 'server')
    app.config['UPLOAD_FOLDER'] = os.path.join(app.config['BASE'],
                                               'video_uploads')
    app.config['AUDIO_FOLDER'] = os.path.join(app.config['BASE'],
                                              'audio_extractions')
    app.config['OUTPUT_FOLDER'] = os.path.join(app.config['BASE'],
                                               'output_transcripts')

    os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
    os.makedirs(app.config['AUDIO_FOLDER'], exist_ok=True)
    os.makedirs(app.config['OUTPUT_FOLDER'], exist_ok=True)

    return
Ejemplo n.º 18
0
 def get_question(self):
     speech_to_text = SpeechToTextV1(authenticator=self.authenticator)
     speech_to_text.set_service_url(
         'https://stream.watsonplatform.net/speech-to-text/api')
     with open('./file.wav', 'rb') as audio_file:
         speech_recognition_results = speech_to_text.recognize(
             audio=audio_file,
             content_type='audio/wav',
             word_alternatives_threshold=0.9,
             keywords=['if', 'oque e', 'do while'],
             keywords_threshold=0.5).get_result()
     return speech_recognition_results.get('results')[0].get(
         'alternatives')[0].get('transcript')
Ejemplo n.º 19
0
 def __init__(self, filepath, callback_fn):
     self.key = ''
     self.url = ''
     self.filepath = filepath
     basename = os.path.basename(self.filepath)
     self.filename, self.fileext = os.path.splitext(
         basename)  # ファイル名と拡張子に分離
     self.result_json = {}
     self.result_texts = []
     self.load_key()  # api_key読込
     self.rCallback = self.MyRecognizeCallback(self.save_result)
     self.speech_to_text = SpeechToTextV1(iam_apikey=self.key, url=self.url)
     self.callback_fn = callback_fn
Ejemplo n.º 20
0
def transcribe_audio(path_to_audio_file):
    authenticator = IAMAuthenticator(api)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)

    speech_to_text.set_service_url(api_url)

    # username = os.environ.get("BLUEMIX_USERNAME")
    # password = os.environ.get("BLUEMIX_PASSWORD")
    # speech_to_text = SpeechToText(username=username,
    #                               password=password)

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
Ejemplo n.º 21
0
def STTFunc(bff):
    print("inside STT")
    authenticator = IAMAuthenticator(IAMAuth1)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(service_url1)
    speech_recognition_results = speech_to_text.recognize(
        audio=bff,
        content_type='audio/wav',
    ).get_result()
    x = speech_recognition_results["results"][0]["alternatives"][0][
        "transcript"]
    print(speech_recognition_results)
    return x
Ejemplo n.º 22
0
def transcribe():
    print("Transcribing...")
    service = SpeechToTextV1(authenticator=speech_to_text_authenticator)
    model = service.get_model('en-US_NarrowbandModel').get_result()

    with open(FILE_NAME, 'rb') as audio_file:
        api_result = service.recognize(
            audio=audio_file,
            content_type='audio/wav',
            model='en-US_NarrowbandModel').get_result()

    print("Finished transcribing...")
    transcript = concatenate_transcription(api_result)
    return transcript
Ejemplo n.º 23
0
def speech_to_text():
    # calling to the StoT tool from the IBM cloud
    speech_to_text = SpeechToTextV1(
        iam_apikey='OZeac4xlSe-tv1wP1rsyV0BsIYzEmTyp83H_pwdzQPFl',
        url='https://gateway-lon.watsonplatform.net/speech-to-text/api')
    with open(join(dirname(__file__), './.', 'output.wav'),
              'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            content_type='audio/wav',
            word_alternatives_threshold=0.9,
            keywords=['colorado', 'tornado', 'tornadoes'],
            keywords_threshold=0.5).get_result()
        return (json.dumps(speech_recognition_results, indent=2))
Ejemplo n.º 24
0
def parse_audio(path):

    audio = path + '/recording.mp3'
    print audio
    CHUNK = 1024
    # Note: It will discard if the websocket client can't consumme fast enough
    # So, increase the max size as per your choice
    BUF_MAX_SIZE = CHUNK * 10

    speech_to_text = SpeechToTextV1(
        iam_apikey='9e0ri-mtT_R8DicTjLTNkRe9T1WJFxHdkFBYobAmlxp2',
        url=
        'https://gateway-wdc.watsonplatform.net/speech-to-text/api/v1/recognize'
    )

    speech_to_text.disable_SSL_verification()
    jsonresult = ""
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_data(self, data):
            q.put(data)

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

    myRecognizeCallback = MyRecognizeCallback()

    #read input audio file
    with open(audio, 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/mp3',
            recognize_callback=myRecognizeCallback,
            model='en-US_BroadbandModel',
            speaker_labels=True)

    # write to raw transcript
    with open(path + '/sample.json', 'w+') as f:
        while not q.empty():
            f.write(json.dumps(q.get()))
    return list(q.queue)
Ejemplo n.º 25
0
def main(args):

    # Parse incoming request headers
    _c_type, p_dict = parse_header(args['__ow_headers']['content-type'])

    # Decode body (base64)
    decoded_string = b64decode(args['__ow_body'])

    # Set Headers for multipart_data parsing
    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)

    # Parse incoming request data
    multipart_data = parse_multipart(BytesIO(decoded_string), p_dict)

    # Build flac file from stream of bytes
    fo = open("audio_sample.flac", 'wb')
    fo.write(multipart_data.get('audio')[0])
    fo.close()

    # Basic Authentication with Watson STT API
    stt_authenticator = BasicAuthenticator(
        'apikey', 'zlgvP1jXnCCBMsNIK76OFagmlJcnlEC_BnmptKYXun3u')

    # Construct a Watson STT client with the authentication object
    stt = SpeechToTextV1(authenticator=stt_authenticator)

    # Set the URL endpoint for your Watson STT client
    stt.set_service_url(
        'https://api.us-south.speech-to-text.watson.cloud.ibm.com')

    # Read audio file and call Watson STT API:
    with open(
            os.path.join(os.path.dirname(__file__), './.',
                         'audio_sample.flac'), 'rb') as audio_file:
        # Transcribe the audio.flac with Watson STT
        # Recognize method API reference:
        # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize
        stt_result = stt.recognize(audio=audio_file,
                                   content_type='audio/flac',
                                   model='pt-BR_BroadbandModel').get_result()

    # Print STT API call results
    print(json.dumps(stt_result, indent=2))

    # Return a dictionary with the transcribed text
    return {
        "transcript": stt_result['results'][0]['alternatives'][0]['transcript']
    }
Ejemplo n.º 26
0
def transcribe_sync(audio_filepath: str) -> DetailedResponse:
    with open(audio_filepath, "rb") as audiofile:
        authenticator = IAMAuthenticator(APIKEY)
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_default_headers(
            {"X-Watson-Learning-Opt-Out": "true"})
        speech_to_text.set_service_url(APIURL)
        click.echo("Transcribing audio, this may take a while...")
        response = speech_to_text.recognize(
            audiofile,
            word_confidence=True,
            end_of_phrase_silence_time=30.0,
            profanity_filter=False,
        )
        return response
Ejemplo n.º 27
0
def main():
    rospy.init_node('s2t_rt', anonymous=True)
    # Get parameters
    input_topic = rospy.get_param('~input_topic')
    credentials_path = rospy.get_param('~credentials_path')
    format = rospy.get_param('~format', 'PCM')

    # Get credentials
    with open(credentials_path) as cf:
        credentials = yaml.safe_load(cf)

    speech_to_text = SpeechToTextV1(iam_apikey=credentials['apikey'],
                                    url=credentials['url'])
    queue = Queue(maxsize=10)
    audio_source = AudioSource(queue, is_recording=True, is_buffer=True)
    recognize_callback = MyRecognizeCallback('~transcript', '~interim')

    msg = rospy.wait_for_message(input_topic, AudioData)
    if format == 'FLAC':
        content_type = 'audio/flac'
    else:
        # Get content type from message
        endianness = 'big-endian' if msg.is_bigendian else 'little-endian'
        content_type = """audio/l16; rate={}; channels={};
        endianness={}""".format(msg.sample_rate, msg.num_channels, endianness)
    recognizer = speech_to_text.recognize_using_websocket(
        audio=audio_source,
        content_type=content_type,
        recognize_callback=recognize_callback,
        interim_results=True,
        inactivity_timeout=-1)
    recognize_thread = threading.Thread(target=recognizer.start, args=())
    recognize_thread.daemon = True
    recognize_thread.start()

    def callback(msg):
        if format == 'FLAC':
            dtype = width_to_dtype(msg.sample_width)
            data = np.fromstring(msg.data, dtype)
            with io.BytesIO() as flac_file:
                sf.write(flac_file, data, msg.sample_rate, format=format)
                queue.put(str(ogg_file.getvalue()))
        else:
            queue.put(str(msg.data))

    rospy.Subscriber(input_topic, AudioData, callback)
    rospy.spin()
    recognizer.close()
Ejemplo n.º 28
0
    def process_file(self, path):
        cache_path = path.replace('.wav', '.watson')

        if os.path.exists(cache_path):
            with open(cache_path) as f:
                return json.load(f)

        stt_service = SpeechToTextV1(
            authenticator=IAMAuthenticator(self._stt_apikey))
        stt_service.set_service_url(self._stt_url)

        with open(path, 'rb') as audio_file:
            stt_response = stt_service.recognize(
                audio=audio_file,
                content_type='audio/wav',
                language_customization_id=self._custom_id).get_result(
                )['results']

        if stt_response:
            transcript = stt_response[0]['alternatives'][0][
                'transcript'].lower()
        else:
            return None

        nlu_service = NaturalLanguageUnderstandingV1(
            authenticator=IAMAuthenticator(self._nlu_apikey),
            version='2018-03-16')
        nlu_service.set_service_url(self._nlu_url)

        response = nlu_service.analyze(
            features=Features(entities=EntitiesOptions(model=self._model_id)),
            text=transcript,
            language='en').get_result()['entities']

        intent = None
        slots = dict()
        for e in response:
            if e['type'] == 'orderDrink':
                intent = 'orderDrink'
            else:
                slots[e['type']] = e['text']

        result = dict(intent=intent, slots=slots, transcript=transcript)

        with open(cache_path, 'w') as f:
            json.dump(result, f, indent=2)

        return result
Ejemplo n.º 29
0
def post_audio_to_speech_to_textAPI(filename):
    authenticator = IAMAuthenticator(
        'VNYNGcQrwZaumnTJMani2qbBa8veDOfjXQBRXsr3l5rX')
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(
        'https://gateway-tok.watsonplatform.net/speech-to-text/api')
    lang = "ja-JP_BroadbandModel"
    with open(filename, 'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            model=lang,
            content_type='audio/mp3',
            timestamps=True,
            speaker_labels=True,
        ).get_result()
    return speech_recognition_results
Ejemplo n.º 30
0
def speech_to_text(file):
    try:
        if USE_MOCK:
            return speech_mock()
        else:
            with open(os.path.join(UPLOAD_FOLDER, file.filename),
                      'rb') as audio_file:
                service = SpeechToTextV1()
                return service.recognize(audio=audio_file,
                                         content_type='audio/flac',
                                         model='pt-BR_NarrowbandModel',
                                         max_alternatives=0).get_result()

    except ApiException as e:
        print(e.global_transaction_id)
        return None