from __future__ import print_function
from os.path import join, dirname
from ibm_watson import TextToSpeechV1
from ibm_watson import SpeechToTextV1
from ibm_watson import LanguageTranslatorV3
from playsound import playsound

t2s = TextToSpeechV1(
    url='https://stream-fra.watsonplatform.net/text-to-speech/api',
    iam_apikey='Ia5Hm2hmHyofPhZw0iY6ZIehbE7miqUm1abf6SlTJ-7N')
s2t = SpeechToTextV1(
    url='https://stream-fra.watsonplatform.net/speech-to-text/api',
    iam_apikey='KChSwZGxpjGPTQr2uXOPjEEj2G0IbaOK2FZe78eAuaG5')
translator = LanguageTranslatorV3(
    version='2018-05-01',
    url='https://gateway-fra.watsonplatform.net/language-translator/api',
    iam_apikey='XWy87z-yWfCgk1fqRtpnntitN4N016pP0hox2g1BerCL')


def loose_in_translation(text, voice):
    print('text to speech')
    with open(join(dirname(__file__), 'resources', voice + '.wav'),
              'wb') as audio_file:
        response = t2s.synthesize(text, accept='audio/wav',
                                  voice=voice).get_result()
        audio_file.write(response.content)

    print('playing result...')
    playsound(join(dirname(__file__), 'resources', voice + '.wav'), )

    print('speech to text')
Beispiel #2
0
import os
import sys
import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from queue import Queue, Full

stop_now = False

# retrieve key from bash environment variable
iam_apikey = os.environ['IAM_APIKEY']

# set up an instance of the STT service
service = SpeechToTextV1(
    url='https://gateway-lon.watsonplatform.net/speech-to-text/api',
    iam_apikey=iam_apikey)

CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))

# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)


class MyRecognizeCallback(RecognizeCallback):
    def __init__(self):
Beispiel #3
0
import json
from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
import threading
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from utils import Utils

config = Utils.readYaml("config.yaml")

# initialize speech to text service
authenticator = IAMAuthenticator(config['watson']['API_KEY'])
service = SpeechToTextV1(authenticator=authenticator)
service.set_service_url(config['watson']['URL'])

models = service.list_models().get_result()
print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
print(json.dumps(model, indent=2))

with open(join(dirname(__file__), 'speech.wav'), 'rb') as audio_file:
    print(
        json.dumps(service.recognize(audio=audio_file,
                                     content_type='audio/wav',
                                     timestamps=True,
                                     word_confidence=True).get_result(),
                   indent=2))


# Example using websockets
Beispiel #4
0
from ibm_watson import SpeechToTextV1
from os.path import join, dirname
import json

speech_to_text = SpeechToTextV1(
    iam_apikey='PUfqdSXO-sKGCShdkJ_SX55gY35bm42s-aZqbbksSQUP',
    url='https://gateway-lon.watsonplatform.net/speech-to-text/api')

with open(join(dirname(__file__), './.', 'audio-file2.flac'),
          'rb') as audio_file:
    speech_recognition_results = speech_to_text.recognize(
        audio=audio_file,
        content_type='audio/flac',
        word_alternatives_threshold=0.9,
        keywords=['colorado', 'tornado', 'tornadoes'],
        keywords_threshold=0.5).get_result()
print(json.dumps(speech_recognition_results, indent=2))
#print(speech_recognition_results['results'][0]['alternatives'][0]['transcript'])
Beispiel #5
0
from ibm_watson import SpeechToTextV1
import time
import json
import io

speech_to_text = SpeechToTextV1(
    iam_apikey="api_key",
    url="https://stream.watsonplatform.net/speech-to-text/api")

language_models = speech_to_text.list_language_models().get_result()

customizations = language_models["customizations"]

for customization in customizations:
    id = customization["customization_id"]
    print("deleting customization:", id)
    speech_to_text.delete_language_model(id)

language_model = speech_to_text.create_language_model(
    'Education language model',
    'pt-BR_BroadbandModel',
    description='An education language model to use on distance learning'
).get_result()

customization_id = language_model["customization_id"]
print("creating customization:", customization_id)

language_model = speech_to_text.get_language_model(
    customization_id).get_result()
status = language_model["status"]
Beispiel #6
0
from ibm_watson import SpeechToTextV1
from os.path import join, dirname
import json

speech_to_text = SpeechToTextV1(
    iam_apikey='mRku2G3EtmnHGccaOihFJHvUI0VkhW07EEXij4HQtk5g',
    url='https://gateway-syd.watsonplatform.net/speech-to-text/api')


def speechToText(file_name):
    with open(join(dirname(__file__), './.', file_name), 'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            content_type='audio/mp3',
        ).get_result()
    return speech_recognition_results
Beispiel #7
0
T2S_TOKEN = config.T2S_TOKEN
T2S_URL = config.T2S_URL
S2T_TOKEN = config.S2T_TOKEN
S2T_URL = config.S2T_URL

logger = logging.getLogger('TelegramBot')

t2sauth = IAMAuthenticator(T2S_TOKEN)
s2tauth = IAMAuthenticator(S2T_TOKEN)

text2speech = TextToSpeechV1(authenticator=t2sauth)

text2speech.set_service_url(T2S_URL)

speech2text = SpeechToTextV1(authenticator=s2tauth)

speech2text.set_service_url(S2T_URL)


def convert_voice(audio_file):
    response = speech2text.recognize(audio=audio_file,
                                     content_type='audio/ogg',
                                     model='pt-BR_NarrowbandModel')
    result = response.get_result()
    logger.info('Detectada frase: ' +
                result['results'][0]['alternatives'][0]['transcript'])
    return result['results'][0]['alternatives'][0]['transcript']


def convert_text(message):
Beispiel #8
0
    def transcription(self):
        ''' Returns transcription of the inputed audio files '''
        authenticator = IAMAuthenticator(WATSON_API_KEY)
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_service_url(SERVICE_URL)

        results = []
        #Iterating through all inputted files
        for item in self.audio:
            file_type = "audio/flac"
            if ".wav" in item:
                file_type = "audio/wav"
            elif ".mpeg" in item:
                file_type = "audio/mpeg"
            elif ".mp3" in item:
                file_type = "audio/mp3"
            file = open(item, "rb")
            #API CAlL
            response = speech_to_text.recognize(file,
                                                content_type=file_type,
                                                smart_formatting=True,
                                                timestamps=True,
                                                inactivity_timeout=90)
            results.append(response.get_result())

        phrase = []
        t_string = ""
        t_start = 0
        temp = []

        # Obtain the timestamps of each word to include periods.
        for speaker, item in enumerate(results):
            temp = []
            for r_index, i in enumerate(item['results']):
                for j in i['alternatives']:
                    for index, word in enumerate(j['timestamps']):
                        #Take into account seperation in transcript from Watson IBM
                        if index == 0:
                            if temp:
                                if (word[1]) - (temp[1][2]) < 0.45:
                                    t_string += " " + str(word[0])
                                else:
                                    t_string += ". "
                                    phrase.append([
                                        speaker, t_string, t_start, temp[1][2]
                                    ])
                                    t_string = ""
                                    t_start = word[1]
                                    t_string += str(word[0])
                                    temp = []
                                continue
                            if t_string:
                                t_string += ". "
                                phrase.append([
                                    speaker, t_string, t_start,
                                    j['timestamps'][index - 1][2]
                                ])
                                t_string = ""
                                t_start = word[1]
                                t_string += str(word[0])
                                continue
                            t_string += word[0]
                            t_start = word[1]
                        # The amount of time to determine when a period is placed is decided here.
                        elif (word[1]) - (j['timestamps'][index -
                                                          1][2]) < 0.45:
                            t_string += " " + str(word[0])
                            if index == len(j['timestamps']) - 1:
                                if (len(results)
                                        == 1) or (r_index
                                                  == len(item['results']) - 1):
                                    t_string += ". "
                                    phrase.append(
                                        [speaker, t_string, t_start, word[2]])
                                    t_string = ""
                                else:
                                    temp = [index, word, speaker]
                        else:
                            t_string += ". "
                            phrase.append([
                                speaker, t_string, t_start,
                                j['timestamps'][index - 1][2]
                            ])
                            t_string = ""
                            t_start = word[1]
                            t_string += str(word[0])

        if temp:
            t_string += ". "
            phrase.append([temp[2], t_string, t_start, temp[1][2]])

        #Sort the phrases from all the audio transcriptions in chronological order
        phrase.sort(key=lambda x: x[2])
        return phrase
Beispiel #9
0
from ibm_watson import TextToSpeechV1, SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import json

# Síntesis
tts_authenticator = IAMAuthenticator(
    '16brHHpIvclEZodkhLMGcoq4FKN7_dTh601Gh9HauYRH')
tts = TextToSpeechV1(authenticator=tts_authenticator)
tts.set_service_url('https://stream.watsonplatform.net/text-to-speech/api')

with open('hello_world.wav', 'wb') as audio_file:
    audio_file.write(
        tts.synthesize('Hello world',
                       voice='en-US_AllisonVoice',
                       accept='audio/wav').get_result().content)

# Reconocimiento
stt_authenticator = IAMAuthenticator(
    'bXmuzE7OGUPlDTgQfqt_0RlmlfwcZSmVbpsGi8_7KEQ0')
stt = SpeechToTextV1(authenticator=stt_authenticator)
stt.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')

with open('hello_world.wav', 'rb') as audio_file:
    print(
        json.dumps(stt.recognize(audio=audio_file,
                                 content_type='audio/wav',
                                 timestamps=True,
                                 word_confidence=True).get_result(),
                   indent=2))
from __future__ import print_function
import json
from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
import threading

# If service instance provides API key authentication
service = SpeechToTextV1(
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    url='https://stream.watsonplatform.net/speech-to-text/api',
    iam_apikey='YOUR APIKEY')

# service = SpeechToTextV1(
#     username='******',
#     password='******',
#     url='https://stream.watsonplatform.net/speech-to-text/api')

models = service.list_models().get_result()
print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
print(json.dumps(model, indent=2))

with open(join(dirname(__file__), '../resources/speech.wav'),
          'rb') as audio_file:
    print(json.dumps(
        service.recognize(
            audio=audio_file,
            content_type='audio/wav',
            timestamps=True,
audio = video.audio
audio.write_audiofile('./test.wav')

from pydub import AudioSegment
import os
song = AudioSegment.from_wav(
    "C:/Users/Nimish/Documents/Flasktut/hirebot/test.wav")
song.export("test.flac", format="flac")

from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from os.path import join, dirname
import json

speech_to_text = SpeechToTextV1(
    iam_apikey='API_KEY',
    url='https://gateway-lon.watsonplatform.net/speech-to-text/api')


class MyRecognizeCallback(RecognizeCallback):
    def __init__(self):
        RecognizeCallback.__init__(self)

    def on_data(self, data):
        print(json.dumps(data, indent=2))

    def on_error(self, error):
        print('Error received: {}'.format(error))

    def on_inactivity_timeout(self, error):
        print('Inactivity timeout: {}'.format(error))
import speech_recognition as sr
from ibm_watson import SpeechToTextV1
import json

r = sr.Recognizer()
speech = sr.Microphone()

speech_to_text = SpeechToTextV1(iam_apikey="YOUR_API_KEY", url="YOUR_URL")

print('speech_to_text: ' + str(speech_to_text))

with speech as source:
    print("say something!!...")
    audio_file = r.adjust_for_ambient_noise(source)
    audio_file = r.listen(source)
print('audio_file: ' + str(audio_file))

speech_recognition_results = speech_to_text.recognize(
    audio=audio_file.get_wav_data(), content_type='audio/wav').get_result()
print(json.dumps(speech_recognition_results, indent=2))
Beispiel #13
0
def upload_audio():
    audio = request.files["audio"]
    language = request.form["language"]
    allowed_mimes = ["audio/mp3", "audio/mpeg", "audio/ogg", "audio/wav"]
    allowed_language_models = ["en-US_BroadbandModel"]
    songs = [] # Data JSON untuk ditampilkan
    
    # Cek MIME Type
    #return audio.content_type.split("/",1)[1]
    if audio.content_type not in allowed_mimes:
        return "File Harus {0}".format(",".join(allowed_mimes)), 400
        
    # Cek Bahasa
    if language not in allowed_language_models:
        return "Maaf Bahasa Tidak Disupport", 400    
    
    # Save audio file
    filename = secure_filename(audio.filename)
    save_path = os.path.join(settings.UPLOAD_FOLDER, filename) # Simpan audio file ke folder uploads
    audio.save(save_path)
    
    # Manipulate audio
    song = AudioSegment.from_file(save_path, format=audio.content_type.split("/",1)[1])
    song_duration_in_seconds = len(song) / 1000
    # Cek durasi audio
    if int(song_duration_in_seconds) > 15:
        os.remove(save_path) # Hapus audio file
        return "Durasi maksimal audio adalah 15 detik", 400
    # Normalize audio volume
    save_path = os.path.join(settings.UPLOAD_FOLDER, "normalize-" + filename)
    normalizing_song = song.normalize()
    normalizing_song.export(save_path, format=audio.content_type.split("/",1)[1])
        
    # Mengirim file audio ke IBM Speech to Text API
    # Authentication
    authenticator = IAMAuthenticator(settings.WATSON_API_KEY)
    speech_to_text = SpeechToTextV1(
        authenticator=authenticator
    )
    speech_to_text.set_service_url(settings.WATSON_SERVICE_URL)
    # Sending/Recognizing
    with open(save_path, "rb") as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            content_type=audio.content_type,
            model=language,
            speech_detector_sensitivity=0.5,
            background_audio_suppression=0.5,
            smart_formatting=True,
            profanity_filter=False
        ).get_result()
        logging.error(json.dumps(speech_recognition_results, indent=2))
        
        # Cek apakah mendapatkan hasil transkrip
        if len(speech_recognition_results["results"]) > 0:
            # Simpan transkrip audio
            transcripts = {"data": []}
            for transcript in speech_recognition_results["results"]:
                transcripts["data"].append(transcript["alternatives"][0])
            max_confidence_transcript = max(transcripts["data"], key=lambda x:x["confidence"]) # Ambil transkrip audio dengan nilai confidence tertinggi
            # Cari lagu berdasarkan lirik yang didapat dari hasil max_confidence_transcript
            for song in Song.query.filter(Song.lyrics.match(max_confidence_transcript["transcript"])).limit(10).all():
                logging.error(song)
                songs.append({"artist": song.artist, "releases": song.releases, "title": song.title}) # Masukkan data lagu
    
    return render_template("ResultPage.html", songs=songs), 200 # Tampilkan hasil
Beispiel #14
0
    return dialog_counter


# Initialize Watson Speech to Text Service

speech_to_text_credentials = {
    "apikey": "UF2I5P9NXt1HXnmye2fkDeP2kxp_tb9VVSWF3i5qjuZ3",
    "iam_apikey_description": "Auto-generated for key 16171d30-82c7-4b51-8f0e-fe656da5cdcd",
    "iam_apikey_name": "wdp-writer",
    "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
    "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/eed7635af9de4ec1a02ed80b7edae9dc::serviceid:ServiceId-445f4b12-5e0d-45d4-a284-7f2a7138cf52",
    "url": "https://api.us-east.speech-to-text.watson.cloud.ibm.com/instances/687866d5-4515-46ca-8789-9191fb6e41e3",
}

speech_to_text_authenticator = IAMAuthenticator(speech_to_text_credentials['apikey'])
speech_to_text_service = SpeechToTextV1(authenticator=speech_to_text_authenticator)
speech_to_text_service.set_service_url(speech_to_text_credentials['url'])


def speech_to_text(file_name):
    with open(file_name, "rb") as audio_file:
        result = speech_to_text_service.recognize(
            audio_file, content_type="audio/wav",
            continuous=True, timestamps=False,
            max_alternatives=1
        )
    try:
        return result.result['results'][0]['alternatives'][0]['transcript']
    except:
        return None
import sys
import json
import time
import os.path
from os import path
from beeply import notes
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QApplication, QWidget, QInputDialog, QLineEdit, QFileDialog, QMainWindow,QListWidget,QListWidgetItem
from PyQt5.QtGui import QIcon
from PyQt5.QtCore import Qt
from ibm_watson import TextToSpeechV1
from ibm_watson import SpeechToTextV1
from ibm_watson import ApiException
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
api=IAMAuthenticator("xxxxxxxxxxxxxxxxxxxxxxxxxxx")  #your authenticator id
speech2 = SpeechToTextV1(authenticator=api)
speech2.set_service_url("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") # your service url
api1=IAMAuthenticator("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")                                       #your authenticator id
text2 = TextToSpeechV1(authenticator=api1)
text2.set_service_url("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")  # your service url
mybeep=notes.beeps(200)
class MainWindow(QMainWindow):
    fileName=""
    def __init__(self):
        super(MainWindow,self).__init__()
        self.setObjectName("MainWindow")
        self.setWindowModality(QtCore.Qt.ApplicationModal)
        self.setFixedSize(590, 597)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
Beispiel #16
0
import messager
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import zone

# Use the application default credentials
cred = credentials.Certificate('./cred1.json')
firebase_admin.initialize_app(cred)

db = firestore.client()

hot = ["intoxicated", "intoxicate", "toxic", "shooting","shoot","shot","blood","bleeding","virus","killed","kill","murder","murdering","murdered","injured","injury","harm","harmed","harming","attacker","offender","armed","arms","gun","steal","stole","robbery","punching","fired","fire","infected","STI","sexually","rape","raping","raped","explosion","explode","exploded","food-borne","illness","salmonella","ebola","coli","gunned","loose","disaster","tornado","hurricane","storm","sex","harrassment","harrasser","offender","killer","serial","bomb","bombing","threat","threatened","threatening","closed","close","fled","flee","escaped","flood","flooding","contaminated","contamination","contaminate","exposing","danger","lose","fire","naked","broke","substance","fight", "stolen"]

speech_to_text = SpeechToTextV1(
    iam_apikey='V_g8OgIsLNpHPQ9PBTF1i_0LnflSXmsiJiMQOZ6HOjTH',
    url='https://stream.watsonplatform.net/speech-to-text/api'
)

def getTextAndKeywords(json):
    text = ''
    keywords = []
    for result in json['results']:
        text += result['alternatives'][0]['transcript'].strip() + ' '
        if ('keywords_result' in result.keys()):
            keywords = keywords + (list(result['keywords_result'].keys()))
    
    return (text.strip(), keywords)

while(True):
    filenames = sorted([f for f in listdir('audio') if not f.startswith('.')], key= lambda x : int(x.split('.')[0]))
    if (len(filenames) > 1): # if there is only one file, it is probably being currently written to
Beispiel #17
0
import assistant_setup
# One time initialization
engine = pyttsx3.init()

# Set properties _before_ you add things to say
engine.setProperty('rate', 150)  # Speed percent (can go over 100)
engine.setProperty('volume', 0.5)  # Volume 0-1

load_dotenv()

authenticator = (get_authenticator_from_environment('assistant')
                 or get_authenticator_from_environment('conversation'))
assistant = AssistantV1(version="2019-11-06", authenticator=authenticator)
workspace_id = assistant_setup.init_skill(assistant)

speech_to_text = SpeechToTextV1()
language = 'en'


def play_mp3(path):
    subprocess.Popen(['mpg123', '-q', path]).wait()


def record_audio():
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    RECORD_SECONDS = 5
    WAVE_OUTPUT_FILENAME = "record.wav"
Beispiel #18
0
from __future__ import print_function
import json
from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
import threading

# If service instance provides API key authentication
service = SpeechToTextV1(
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    url='https://stream.watsonplatform.net/speech-to-text/api',
    iam_apikey='DBxOesEcwYTQK9-dvcaxTwBICWk0s3RwwEW6m-2eppDn')

# service = SpeechToTextV1(
#     username='******',
#     password='******',
#     url='https://stream.watsonplatform.net/speech-to-text/api')

models = service.list_models().get_result()
print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
print(json.dumps(model, indent=2))

with open(join(dirname(__file__), '../resources/speech.wav'),
          'rb') as audio_file:
    print(
        json.dumps(service.recognize(audio=audio_file,
                                     content_type='audio/wav',
                                     timestamps=True,
                                     word_confidence=True).get_result(),
Beispiel #19
0
def main(args):

    # Parse incoming request headers
    _c_type, p_dict = parse_header(
        args['__ow_headers']['content-type']
    )
    
    # Decode body (base64)
    decoded_string = b64decode(args['__ow_body'])

    # Set Headers for multipart_data parsing
    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)
    
    # Parse incoming request data
    multipart_data = parse_multipart(
        BytesIO(decoded_string), p_dict
    )
   

    try:
        # Build flac file from stream of bytes
        fo = open("audio_sample.flac", 'wb')
        fo.write(multipart_data.get('audio')[0])
        fo.close()
        teste=False
    except: 
        teste=True
    #teste = multipart_data.items
    #Pegando o Carro
    carro=multipart_data.get('car')[0]
    
    if teste == False: 
         
         # Basic Authentication with Watson STT API
        stt_authenticator = BasicAuthenticator(
        'apikey',
        'apikey'
        )

         #Autenticacao STT
        # Construct a Watson STT client with the authentication object
        stt = SpeechToTextV1(authenticator=stt_authenticator)

     # Set the URL endpoint for your Watson STT client
        stt.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com')

        # Read audio file and call Watson STT API:
        with open(
            os.path.join(
                os.path.dirname(__file__), './.',
                'audio_sample.flac'
            ), 'rb'
        ) as audio_file:
            # Transcribe the audio.flac with Watson STT
            # Recognize method API reference: 
            # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize
            stt_result = stt.recognize(
                audio=audio_file,
                content_type='audio/flac',
                model='pt-BR_BroadbandModel'
            ).get_result()

        authenticator_nlu = BasicAuthenticator(
        'apikey',
        'apikey'
        )
        natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu)
        natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com')

        texto_stt=stt_result['results'][0]['alternatives'][0]['transcript']
        try:
            nlu_resp = natural_language_understanding.analyze(text=texto_stt,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result()
        except ApiException as ex:
            print ("Method failed with status code " + str(ex.code) + ": " + ex.message)

    elif teste == True : 
        
        #Pegando o Text 
        texto=multipart_data.get('text')[0]
        carro=multipart_data.get('car')[0]

        authenticator_nlu = BasicAuthenticator(
        'apikey',
        'apikey'
        )
        natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu)
        natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com')

        #Definindo features  
        try:
            nlu_resp = natural_language_understanding.analyze(text=texto,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result()
        except ApiException as ex:
            print ("Method failed with status code " + str(ex.code) + ": " + ex.message)

 
    sent_rec=[]
    sent_json=[]
    score_rec=[]
    score_json=[]
    ent_rec=[]
    ent_json=[]
    ment_json=[]
    #Pegando a lista de sentimentos negativos
    try:
        for x in range(50):
            aux=nlu_resp['entities'][x]['sentiment']['label']
            sent_json.append(nlu_resp['entities'][x]['sentiment']['label'])
            score_json.append(nlu_resp['entities'][x]['sentiment']['score'])
            ent_json.append(nlu_resp['entities'][x]['type'])
            ment_json.append(nlu_resp['entities'][x]['text'])
        
            #print(aux)
            if  aux != 'neutral':
                if aux !='positive':
                    sent_rec.append(nlu_resp['entities'][x]['sentiment']['label'])
                    score_rec.append(nlu_resp['entities'][x]['sentiment']['score'])
                    ent_rec.append(nlu_resp['entities'][x]['type'])
                    #print("entrou")
        
    except:
        saiu=1

    #lista de carros que podemos usar 
    lista= ["FIAT 500","DUCATO","ARGO","FIORINO","MAREA","RENEGADE","CRONOS"]
    lista_seg_op=["TORO","ARGO","DUCATO","FIAT 500","CRONOS","CRONOS","ARGO"]
    lista_prioridade=["SEGURANCA","CONSUMO","DESEMPENHO","MANUTENCAO","CONFORTO","DESIGN","ACESSORIOS"]

    for x in range(len(lista)):
        if carro == lista[x]:
            lista[x]=lista_seg_op[x]

    #Decidindo qual carro escolher 
    if sent_rec !=[]:

        #entidade.append("MANUTENCAO")
        #Sentimento.append(-1)
        #cont=0
        entidade_aux=0
        sent_aux=0

        for x in range(len(score_rec)):
            dif=abs(sent_aux-score_rec[x])

            if dif > 0.1:
                if score_rec[x] < sent_aux:
                    sent_aux= score_rec[x]
                    entidade_aux=ent_rec[x]
                    print(sent_aux,entidade_aux)
            elif dif < 0.1:
            #Desempate
                #print("aqui")
                for y in range(len(lista)):
                    if entidade_aux == lista_prioridade[y]:
                        sent_aux=sent_aux
                        entidade_aux=entidade_aux
                    elif ent_rec[x] == lista_prioridade[y]:
                        sent_aux= score_rec[x]
                        entidade_aux=ent_rec[x]
        
        for x in range(len(lista)):
            if lista_prioridade[x] == entidade_aux:
                sugest=lista[x]
    else:
        sugest=""

    list_json=[]
    for x in range(len(sent_json)):
        list_json.append({"entity":ent_json[x], "sentiment": score_json[x],"mention": ment_json[x]})

    return {
        "recommendation":sugest,
        "entities":list_json
        
        
    }
Beispiel #20
0
def instantiate_stt(api_key, url_service):
    """Link a SDK instance with a IBM STT instance."""
    authenticator = IAMAuthenticator(api_key)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(url_service)
    return speech_to_text
Beispiel #21
0
def main(args):

    # Parse incoming request headers
    _c_type, p_dict = parse_header(args['__ow_headers']['content-type'])

    # Decode body (base64)
    decoded_string = b64decode(args['__ow_body'])

    # Set Headers for multipart_data parsing
    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)

    # Parse incoming request data
    multipart_data = parse_multipart(BytesIO(decoded_string), p_dict)

    # Build flac file from stream of bytes
    fo = open("audio_sample.flac", 'wb')
    fo.write(multipart_data.get('audio')[0])
    fo.close()

    car = multipart_data.get('car')[0]
    text = multipart_data.get('text')[0]
    """## Serviço NLU

    Você precisará de 3 coisas: A key e a URL do seu serviço de `Natural Language Understanding` e o model_id do seu Knowledge Studio treinado.
    """

    nlu_apikey = "R5Kq3Z4sJbPaepfWCC1d3iYch2kIEHJkF1sqnHZTC-C3"

    nlu_service_url = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/d26c8f6f-666f-44eb-a631-cb8b161f0c48"

    nlu_entity_model = "a52546bf-6061-4fd0-a3ec-f2e6aa6d19b9"
    """Agora instanciamos os serviços com as suas credenciais."""

    # Cria-se um autenticador
    nlu_authenticator = IAMAuthenticator(apikey=nlu_apikey)

    # Criamos o serviço passando esse autenticador
    nlu_service = NaturalLanguageUnderstandingV1(
        version='2018-03-16', authenticator=nlu_authenticator)

    # Setamos a URL de acesso do nosso serviço
    nlu_service.set_service_url(nlu_service_url)

    ## Serviço STT

    stt_apikey = "-pCzIHgC12ljTpVXELSfx71BAP2yUmAlacQaD1YXdZqM"

    stt_service_url = "https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/2dda5ef8-4933-4096-8fb6-ad817e0e105c"
    """E agora instanciamos o serviço com as suas credenciais."""

    stt_authenticator = IAMAuthenticator(apikey=stt_apikey)

    stt_service = SpeechToTextV1(authenticator=stt_authenticator)

    stt_service.set_service_url(stt_service_url)

    stt_model = 'pt-BR_BroadbandModel'

    if audio:

        # Read audio file and call Watson STT API:
        with open(
                os.path.join(os.path.dirname(__file__), './.',
                             'audio_sample.flac'), 'rb') as audio_file:
            # Transcribe the audio.flac with Watson STT
            # Recognize method API reference:
            # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize
            stt_result = stt.recognize(
                audio=audio_file,
                content_type='audio/flac',
                model='pt-BR_BroadbandModel').get_result()

        results_stt = json.loads(
            json.dumps(stt_results, indent=2, ensure_ascii=False))

        text = results_stt['results'][0]['alternatives'][0]['transcript']

        # Return a dictionary with the transcribed text
        #return {
        #    "transcript": stt_result['results'][0]['alternatives'][0]['transcript']
        #}

    # O método analyze cuida de tudo
    nlu_response = nlu_service.analyze(
        text=text,
        features=Features(
            entities=EntitiesOptions(model=nlu_entity_model, sentiment=True)),
        language='pt').get_result()

    results_nlu = json.loads((json.dumps(nlu_response,
                                         indent=2,
                                         ensure_ascii=False)))

    return results_nlu
Beispiel #22
0

# In[11]:


pip install ibm-cloud-sdk-core


# In[21]:


from ibm_watson import SpeechToTextV1
from ibm_watson import LanguageTranslatorV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
iam_apikey_s2t = IAMAuthenticator('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
s2t=SpeechToTextV1(authenticator=iam_apikey_s2t)
s2t.set_service_url("https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
filename='whatstheweatherlike.wav'
with open(filename,mode="rb") as wav:
    response = s2t.recognize(audio=wav,content_type="audio/wav")
response.result


# In[22]:


recognized_text=response.result['results'][0]["alternatives"][0]["transcript"]
recognized_text


# In[33]:
Beispiel #23
0
from ibm_watson import SpeechToTextV1

url_s2t = "https://stream.watsonplatform.net/speech-to-text/api"

iam_apikey_s2t = "..."

# create a speech-to-text adapter object
s2t = SpeechToTextV1(iam_apikey=iam_apikey_s2t, url=url_s2t)

filename = "hello_this_is_python.wav"
# read the file in binary format
with open(filename, mode="rb") as wav:
    response = s2t.recognize(audio=wav, content_type='audio/wav')

# response.result
# {'results': [{'alternatives': [{'confidence': 0.91, 'transcript': 'hello this is python'}],
# 'final': True}], 'result_index': 0}

recognized_text = response.result['results'][0]["alternatives"][0][
    "transcript"]
# recognized_text : 'hello this is python'
import speech_recognition as sr
from ibm_watson import SpeechToTextV1
import json

r = sr.Recognizer()
speech = sr.Microphone()

speech_to_text = SpeechToTextV1(
    iam_apikey="0uxeZa-MaLmGWOXhOSZOB5SuNHQlxXqW-f60vqb34h62",
    url="https://gateway-syd.watsonplatform.net/speech-to-text/api")

print('speech_to_text: ' + str(speech_to_text))

with speech as source:
    print("say something!!...")
    speech_model = speech_to_text.get_model(
        'pt-BR_NarrowbandModel').get_result()
    audio_file = r.adjust_for_ambient_noise(source)
    audio_file = r.listen(source)
print('audio_file: ' + str(audio_file))

speech_recognition_results = speech_to_text.recognize(
    audio=audio_file.get_wav_data(), content_type='audio/wav').get_result()
print(json.dumps(speech_recognition_results, indent=2))
Beispiel #25
0
def main(args):

    model_id = '956978fa-b5e6-4108-96ad-3367bde3478b';

    #NLU
    authenticatorNLU = IAMAuthenticator('bbTi93KoBLq60M_Lj5fMpXInVoYI_CJFp66VBBTtsmhE')
    natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-08-01',authenticator=authenticatorNLU)
    natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/340adba1-4277-46f0-aca3-412077e9b53d')

    _c_type, p_dict = parse_header(
        args['__ow_headers']['content-type']
    )
    
    decoded_string = b64decode(args['__ow_body'])

    p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8")
    p_dict['CONTENT-LENGTH'] = len(decoded_string)

    multipart_data = parse_multipart(BytesIO(decoded_string), p_dict)

    name_audio = uuid.uuid4().hex.upper()[0:50]+'.flac'

    try:
        fo = open(name_audio, 'wb')
        fo.write(multipart_data.get('audio')[0])
        fo.close()
    except:
        fo = False

    if fo: # file audio
        stt_authenticator = BasicAuthenticator(
            'apikey',
            'MaKHsSDKPKgfvQRPDfbFhXSMfvY-JtogeRyQIZn6WPem'
        )
        stt = SpeechToTextV1(authenticator=stt_authenticator)
        stt.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com')
        with open(
            os.path.join(
                os.path.dirname(__file__), './.',
                name_audio
            ), 'rb'
        ) as audio_file:
            stt_result = stt.recognize(
                audio=audio_file,
                content_type='audio/flac',
                model='pt-BR_BroadbandModel'
            ).get_result()
        # print(json.dumps(stt_result, indent=2))
        transcript_audio = stt_result['results'][0]['alternatives'][0]['transcript']
        entities = getEntities(model_id, natural_language_understanding, transcript_audio)
    else:
        text = multipart_data.get('text')[0]
        entities = getEntities(model_id, natural_language_understanding, text)

    #entities[1]['sentiment'] = -0.92
    #entities[2]['sentiment'] = -0.98
    #entities[3]['sentiment'] = -0.92
    #entities[4]['sentiment'] = -0.96

    if general_sentiment > 0:
        return { "recommendation": "", "entities": entities }
    elif general_sentiment < 0:
        nums = []
        repetidos = []
        definidos = []
        for i, item in enumerate(entities):
            nums.append(item['sentiment'])
        min_sentiment = min(nums)
        if len(nums) == len(set(nums)):
            definidos.append(min_sentiment)
        else:
            for idx,sentiment in enumerate(nums):
                if sentiment == min_sentiment:
                    repetidos.append(idx)

            if len(repetidos) > 1:
                definidos.append(entities[min(repetidos)])
            elif len(repetidos) == 1:
                definidos.append(entities[repetidos[0]]) #min_sentiment
                #vector_new = removeElement(nums, min_sentiment)
                #second_min_sentiment = min(vector_new)
                #difference = vector_new - second_min_sentiment
                #if difference < 0.1:
                #    definidos.append(min_sentiment)
                #else:
                #    definidos.append(min_sentiment)
                #definidos.append(min_sentiment)
                #definidos.append(min(vector_new))

        recommendation = ""
        for item in recommendations:
            if item['entity'] == definidos[0]['entity']:
                recommendation = item['car']
        return {
            'recommendation': recommendation, #entities[3]['sentiment']
            'entities': entities
        }

    #return entities
    return {
        "recommendation": "",
        #"general_sentiment": general_sentiment,
        "entities": entities
    }
Beispiel #26
0
def get_service():
    """Gets speech-to-text service"""
    authenticator = IAMAuthenticator(API_KEY)
    speech_to_text = SpeechToTextV1(authenticator=authenticator)
    speech_to_text.set_service_url(URL)
    return speech_to_text
import os
from os.path import join, dirname
from dotenv import load_dotenv
from ibm_watson import SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

#leitura dos arquivos .env
dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)

#integração com ibm watson
ibm_token = os.environ.get("API_KEY_IBM")
api = IAMAuthenticator(ibm_token)
speech_to_text = SpeechToTextV1(authenticator=api)
url_service_ibm = os.environ.get("URL_IBM")
speech_to_text.set_service_url(url_service_ibm)

def Audio_To_Text(fileName):
    with open(fileName, 'rb') as audio_file:
        result = speech_to_text.recognize(
            audio=audio_file, content_type="audio/mp3"
        ).get_result()
    print(result)
from ibm_watson import SpeechToTextV1
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions
from os.path import join, dirname
import json

speech_to_text = SpeechToTextV1(
    iam_apikey='****',
    url='https://stream.watsonplatform.net/speech-to-text/api')

def get_per_user_transcript(fileLocation):
	with open(join(dirname(__file__), './.', fileLocation), 'rb') as audio_file:
		result = speech_to_text.recognize(
			audio=audio_file,
			content_type='audio/wav',
			speaker_labels='true'
		).get_result()

		results = result['results']
		timestamps = []

		for res in results:
			current_timestamps = res['alternatives'][0]['timestamps']
			current_timestamps[-1][0] += '.'
			for t in current_timestamps:
				timestamps.append(t)

		start_to_wordMap = {}
		for stamp in timestamps:
			start_to_wordMap[stamp[1]] = stamp[0]

from dotenv import load_dotenv
from pathlib import Path  # python3 only
import os

env_path = Path('./resources/ibm-credentials-s2t.env')
print(env_path.absolute())
load_dotenv(dotenv_path=env_path)

url_s2t = os.getenv("SPEECH_TO_TEXT_URL")
iam_apikey_s2t = os.getenv("SPEECH_TO_TEXT_IAM_APIKEY")
print(url_s2t, iam_apikey_s2t)

# You create a Speech To Text Adapter object the parameters are the endpoint and API key.
# http://watson-developer-cloud.github.io/python-sdk/v0.25.0/apis/watson_developer_cloud.speech_to_text_v1.html
authenticator = IAMAuthenticator(iam_apikey_s2t)
s2t = SpeechToTextV1(authenticator=authenticator)
s2t.set_service_url(url_s2t)

# the audio file that we will use to convert into text.
audioFilePath = './resources/PolynomialRegressionandPipelines.mp3'

# "rb" ,  this is similar to read mode, but it ensures the file is in binary mode.
# We use the method <code>recognize</code> to return the recognized text.
with open(audioFilePath, mode="rb") as wav:
    response = s2t.recognize(audio=wav, content_type='audio/mp3')

# The attribute result contains a dictionary that includes the translation:
print(response.result)

from pandas import json_normalize
Beispiel #30
0
from ibm_watson import SpeechToTextV1
from os.path import join, dirname
import json
from ibm_watson.websocket import RecognizeCallback, AudioSource

speech_to_text = SpeechToTextV1(
    iam_apikey='GFT9-N0g7zSU9FIM1YLNL7ZyzLdIJ1s_EkluUjYK8B1s',
    url='https://gateway-lon.watsonplatform.net/speech-to-text/api')

#with open(join(dirname(__file__), './.', 'audio1.wav'),
#               'rb') as audio_file:
#    speech_to_text.add_audio(
#        '{customization_id}',
#        'audio1',
#        audio_file,
#        content_type='audio/wav'
#    )
# Poll for audio status.


class MyRecognizeCallback(RecognizeCallback):
    def __init__(self):
        RecognizeCallback.__init__(self)

    def on_data(self, data):
        print(
            json.dumps(data['results'][0]['alternatives'][0]['transcript'],
                       indent=2))

    def on_error(self, error):
        print('Error received: {}'.format(error))