def __init__(self, username, password):
        """Create SpeechToText object.

        username -- username for watson sst service
        password -- password for watson sst service
        """
        self.user = username
        self.pas = password
        self.speech_to_text = SpeechToTextV1(username=username,
                                             password=password,
                                             x_watson_learning_opt_out=True)
Beispiel #2
0
def main(args):
    directory = args.videos_dir
    selected_cat = args.category

    speech_to_text = SpeechToTextV1(username=IBM_USERNAME,
                                    password=IBM_PASSWORD)

    # Read wave file names in videos directory
    audio_names = []
    for video_file in os.listdir(os.path.join(directory, selected_cat)):
        if video_file.endswith(".wav") and not os.path.isfile(
                os.path.join(directory, selected_cat,
                             video_file[0:-4] + '.json')):
            audio_names.append(video_file)
    audio_names = natsorted(audio_names)

    num_files = len(audio_names)
    print('found', num_files, 'files')
    #print(audio_names)

    # Read spreadsheet
    df = pd.read_excel(os.path.join(directory, selected_cat + '.xlsx'))

    for audio_name in audio_names:
        # For each video file, check if the link is available
        data = df[df['Video'].str.contains(audio_name[:-4]) == True]

        link = ''

        if data.shape[0] == 0:
            print('Not found in spredsheet:', audio_name)
        else:
            link = data.iloc[0]['Link']

            # Extract text using Watson
            print('Extracting detailed text using Watson for', audio_name)
            audio_path = os.path.join(directory, selected_cat, audio_name)

            with open(audio_path, "rb") as audio_file:
                result = speech_to_text.recognize(
                    audio_file,
                    content_type="audio/wav",
                    model='es-ES_BroadbandModel',
                    timestamps=True,
                    word_confidence=True,
                ).get_result()

                # add the link to the results
                result['link'] = link

                # save json file
                out_json_path = audio_path[:-4] + '.json'
                with open(out_json_path, 'w') as outfile:
                    json.dump(result, outfile)
Beispiel #3
0
def get_txt(filename):

    record(filename)
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******')
    with open(filename, 'rb') as audio_file:
        return (speech_to_text.recognize(audio_file,
                                         content_type='audio/wav',
                                         smart_formatting='true')['results'][0]
                ['alternatives'][0]['transcript'])
Beispiel #4
0
def sendToSTT():
    audio_file = open("sample.wav", "rb")
    stt = SpeechToTextV1(username=USER, password=PSWD)
    result = stt.recognize(audio=audio_file,
                           content_type=CONT_TYPE,
                           model=LANG)
    result_dict = result.get_result()
    text = ""
    for i in range(len(result_dict["results"])):
        text += result_dict["results"][i]["alternatives"][0][
            "transcript"] + '\n'
    return text
Beispiel #5
0
def stt(filename):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False
    )

    with open(join(dirname(__file__), filename), 'rb') as audio_file:
        result = (speech_to_text.recognize(
            audio_file, content_type='audio/wav', timestamps=False,
            word_confidence=False))
    return result
 def __init__(self):
     self.swrap = SWRAP(threshold=4  000, mode=SWRAP.STREAM)
     self.swrap.stream_start()
     self.get_key_and_pass()
     self.result = WatsonPrediction()
     self.watsonstt = SpeechToTextV1(
         username=self.username,
         password=self.password,
         x_watson_learning_opt_out=False
     )
     log.info("Watson activated")
     self.spin()
def recognize_speech_ibm_plus(yourkey, url_address, audio_input, csv_output):
    # set your authorization
    speech_to_text = SpeechToTextV1(iam_apikey=yourkey, url=url_address)

    # use IBM API to recognize your audio
    files = audio_input
    with open(files, 'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,
            model="en-US_NarrowbandModel",
            # there are 8 models in the IMB API, check their website and get the most suitable one
            content_type=
            'audio/wav',  # the format of your audio and wav is a recommended one
            timestamps=
            True,  # choose if you want to get the onset and offset of your transcription
        ).get_result()

    # since the result is sent to a url in json, you need to download it
    with open("C:/Users/taotao/Desktop/research/test/data.json",
              "w") as write_file:
        json.dump(speech_recognition_results, write_file)
    data = r"C:/Users/taotao/Desktop/research/test/data.json"

    # Read and convert json to dictionary
    def js_r(data):
        with open(data, encoding='utf-8') as f_in:
            return (json.load(f_in))

    my_dic_data = js_r(data)

    # make sure you remember the audio you opened and double check
    print(audio_input)

    # pick the lists we need from the dictionary and combine them in one list
    dict_step1 = my_dic_data['results']
    words = []
    for n in dict_step1:
        sentence = n['alternatives'][0]['timestamps']
        words = words + sentence

    # change the list we get to a table and add a head to it
    list_headline = ['word', 'onset', 'offset']
    table = pd.DataFrame(columns=list_headline, data=words)
    table[['onset']] = table[['onset']] * 1000
    table[['offset']] = table[['offset']] * 1000

    # to fit this table with datavyu, we need to change the order of the columns
    table = table[['onset', 'offset', 'word']]
    print(table)

    # store the table in a csv file
    table.to_csv(csv_output)
Beispiel #8
0
 def _get_client(self, client_type):
     if client_type == "stt":
         return SpeechToTextV1(
                         url=self.yml.get("speech_to_text").get("endpoint"),
                         username=self.yml.get("speech_to_text").get("username"),
                         password=self.yml.get("speech_to_text").get("password")
                     )
     else:
         return ToneAnalyzerV3(
                         version=self.yml.get("tone_analyzer").get("version"), 
                         username=self.yml.get("tone_analyzer").get("username"), 
                         password=self.yml.get("tone_analyzer").get("password")
         )
def sendToSTT():
    audio_file = open("sample.wav", "rb")
    stt = SpeechToTextV1(iam_apikey=KEY, url=URL)
    result = stt.recognize(audio=audio_file,
                           content_type=CONT_TYPE,
                           model=LANG)
    result_dict = result.get_result()
    text = ""
    print(result_dict)
    for i in range(len(result_dict["results"])):
        text += result_dict["results"][i]["alternatives"][0][
            "transcript"] + '\n'
    return text
Beispiel #10
0
def call_to_watson_speech_to_text(filepath):
    speech_to_text = SpeechToTextV1(username=WATSON_SPT_SERVICE_USERNAME,
                                    password=WATSON_SPT_SERVICE_PASSWORD,
                                    x_watson_learning_opt_out=False)
    models = speech_to_text.models()
    us_model = speech_to_text.get_model('en-US_BroadbandModel')
    with open(filepath, 'rb') as audio_file:
        results = speech_to_text.recognize(audio_file,
                                           content_type='audio/wav',
                                           timestamps=True,
                                           word_confidence=True,
                                           speaker_labels=True)
    return results
Beispiel #11
0
    def __init__(self, key, url):
        """Call super constructors and auth to IBM sdk by creating sdk stt interface

        Args:
            url (str): The IBM API url can be found in service credentials of stt service
            key (str): The IBM API key can be found in service credentials of stt service

        """
        ProcessService.__init__(self)
        RecognizeCallback.__init__(self)
        self.url = url
        self.key = key
        self.interface = SpeechToTextV1(iam_apikey=self.key, url=self.url)
def send_to_watson():
    audio_file = open("sample.wav", "rb")
    stt = SpeechToTextV1(username=USER, password=PSWD)
    result = stt.recognize(audio=audio_file,
                           content_type=CONT_TYPE,
                           model=LANG)

    text = ""
    result_dict = result.get_result()
    for i in range(len(result_dict['results'])):
        text += result_dict['results'][i]['alternatives'][0]['transcript']

    return text
Beispiel #13
0
def texttospeech(request):
    speech_to_text = SpeechToTextV1(
        iam_apikey='ivcz4sw1451NvNDgU_9Jfc9y4EqIpo4Qmy8iW4X8x-xX',
        url='https://stream.watsonplatform.net/speech-to-text/api')
    try:
        speech_recognition_results = speech_to_text.recognize(
            audio=request.FILES["audio"],
            content_type='audio/wav').get_result()
        data = speech_recognition_results
        text = data.get("results")[0].get("alternatives")[0].get("transcript")

        personality_insights = PersonalityInsightsV3(
            version='2018-09-20',
            iam_apikey='JoSYNcMGd-pWBUQV289Fv8gh0kFpH5_SDCENobZTruqA',
            url='https://gateway.watsonplatform.net/personality-insights/api')

        try:
            text_file = open("file.txt", "r")
            profile = personality_insights.profile(
                content=text_file.read(),
                accept='application/json',
                content_type='text/plain').get_result()

            results = {}

            personality = profile.get("personality")
            openness = personality[0].get("children")
            conscientiousness = personality[1].get("children")
            extraversion = personality[2].get("children")
            agreeableness = personality[3].get("children")
            neuroticism = personality[4].get("children")
            traits = [
                openness, conscientiousness, extraversion, agreeableness,
                neuroticism
            ]

            for trait in traits:
                for x in range(5):
                    name = trait[x].get("name")
                    percentile = trait[x].get("percentile")
                    results[name] = percentile

            return HttpResponse(json.dumps(results))

        except WatsonApiException as ex:
            print("Method failed: " + ex.message + ": " + str(ex.code))
            return render(request, 'vfl/ser1.html')

    except WatsonApiException as ex:
        print("Method failed with status code " + ex.message)
        return render(request, 'vfl/ser1.html')
Beispiel #14
0
def convert_audio_to_text(file):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)

    with open(file, 'rb') as audio:
        output = speech_to_text.recognize(audio,
                                          content_type='audio/flac',
                                          timestamps=True,
                                          word_confidence=True)

    text = output['results'][0]['alternatives'][0]['transcript']
    return text
Beispiel #15
0
def speech_to_text(filename, model_id):
    #create a client
    stt = SpeechToTextV1(iam_apikey=keys.speech_to_text_key)

    #open file for transcribing.
    #the SpeechToTextV1 recognize() method returns a DetailedResponse object: https://cloud.ibm.com/apidocs/speech-to-text/speech-to-text?code=python#response-details
    #we get the JSON of the transcription result
    with open(filename, 'rb') as audio_file:
        response = stt.recognize(audio=audio_file, content_type='audio/wav', model=model_id).get_result()

    #get results
    transcript = response['results'][0]['alternatives'][0]['transcript']
    #return results
    return transcript
Beispiel #16
0
def receive_audio(speech_file):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)

    with open(join(dirname(__file__), speech_file), 'rb') as audio_file:
        text = json.dumps(speech_to_text.recognize(audio_file,
                                                   content_type='audio/wav',
                                                   timestamps=True,
                                                   word_confidence=True),
                          indent=2)

    return text
 def __init__(self): 
     super(IBMWatsonUtility, self).__init__() 
     self.TTS = TextToSpeechV1( 
         username='******', 
         password='******', 
         x_watson_learning_opt_out=False) 
     self.STT = SpeechToTextV1( 
         username='******', 
         password='******', 
         x_watson_learning_opt_out=False) 
     self.threshold = 500 
     self.chunk_size = 1024 
     self.format = pyaudio.paInt16 
     self.rate = 44100 
Beispiel #18
0
    def __init__(self, ):
        self.MIN_ANSWER_LEN = 5
        self.MIN_CONFIDENCE = 0.60
        self.SMALL_TALK = ['I see.', 'Got it.', 'Ok', 'Interesting']
        self.POSITIVE_REMARK = [
            "Good.", "Excellent!", "Sounds great!", "That's awesome!",
            "Wonderful!"
        ]
        self.NEGATIVE_REMARK = [
            "I'm sad to hear that.", "That doesn't sound very good.",
            "I'm sad to hear that.", "ah",
            "Someone forgot to have their coffee today"
        ]
        self.questions = [
            'Tell me about yourself',
            'Tell me about a recent project that you worked on',
            'What are your greatest weaknesses?',
            'What did you dislike the most about your last job?',
            'If you were an animal, which one would you want to be?',
            'What are your hobbies?',
            'What is your greatest professional achievement?',
            'Why do you want to work here?', 'What are your strengths?',
            'Where do you see yourself in five years?',
            'What type of work environment do you prefer?',
            "What's a time you disagreed with a decision that was made at work?",
            'Why was there a gap in your employment?',
            'Can you explain why you changed career paths?',
            'How do you deal with pressure or stressful situations?',
            'What would your first 30, 60, or 90 days look like in this role?',
            'What are your salary requirements?',
            'How many tennis balls can you fit into a limousine?',
            'Are you planning on having children?',
            'How many ping pong balls fit on a 737?',
            'Describe a difficult work situation / project and how you overcame it',
            'How are you different from the competition?',
            'Do you take work home with you?',
            'How do you view yourself? Whom do you compare yourself to?',
            'What motivates you',
            'What did you like most about your last job?',
            'What did you dislike most about your last job?',
            'Why should I take a risk on you?'
        ]
        self.text_to_speech = TextToSpeechV1(
            x_watson_learning_opt_out=True)  # Optional flag
        self.speech_to_text = SpeechToTextV1(x_watson_learning_opt_out=False)
        self.nlu = NLU(version='2017-02-27')

        self.TEMPFILE = './temp/output.wav'
        self.answers, self.sentiments = [], []
Beispiel #19
0
def get_sound_text(name):
    '''
        This function accepts one string name and returns the string that the audio file refers to.
        name can be any 8 values below:
        name_list = ['airplane','ball','book','helicopter','laptop','ocean','strawberry','train']
    '''
    import json
    from os.path import join, dirname
    from watson_developer_cloud import SpeechToTextV1

    name_list = [
        'airplane', 'ball', 'book', 'helicopter', 'laptop', 'ocean',
        'strawberry', 'train'
    ]
    if name not in name_list:
        return 'You gave "name" a wrong value, it is not in our list'

    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)
    with open(
            join(
                dirname(__file__),
                '/home/steve/Documents/Presentation/request_sound/' + name +
                '.mp3'), 'rb') as audio_file:
        watson_result = json.dumps(speech_to_text.recognize(
            audio_file,
            content_type='audio/mp3',
            timestamps=False,
            word_confidence=True),
                                   indent=2)

    print('Watson\'s sound guessing result is: ')
    print(watson_result
          )  # Here we get watson's result and we print watson's result

    resultstr = str(
        watson_result
    )  # Below we slice the result and keep the string that the sound refers to
    resultlist = resultstr.splitlines()
    for i in resultlist:
        if "transcript" in i:
            termstr = i  # termstr contains "transcript": "correct_word"(it's our word)
    start_index = termstr.find(': "')
    rm_start_str = termstr[start_index + 3:]
    end_index = rm_start_str.find('"')
    return rm_start_str[:end_index -
                        1]  # The return value is exactly the string that the sound refers to
Beispiel #20
0
def speechToText(filePath):
    modified_file_path = filePath[0:len(filePath) - 3] + 'txt'
    txt_file = open(modified_file_path, 'w')
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False
    )
    with open(join(dirname(__file__), filePath),'rb') as audio_file:
        txt_file.write(json.dumps(speech_to_text.recognize(
            audio_file, content_type='audio/wav', timestamps=True,
            word_confidence=True),
                         indent=2))
    txt_file.close()
    return modified_file_path
Beispiel #21
0
    def calc_watson_STT(self, afile):
        speech_to_text = SpeechToTextV1(
            username="******",
            password="******",
            x_watson_learning_opt_out=False)
        print(
            json.dumps(speech_to_text.get_model('en-US_BroadbandModel'),
                       indent=2))

        with open(afile, 'rb') as audio_file:
            stt = speech_to_text.recognize(audio_file,
                                           content_type='audio/wav',
                                           timestamps=True,
                                           word_confidence=False)
        return stt
Beispiel #22
0
def TranscodeFromFile(path, sample_rate):
    try:
        with io.open(path, 'rb') as audio_file:
            # watson connection
            stt = SpeechToTextV1(
                iam_apikey=model.key.WATSON_APIKEY, url=model.key.WATSON_URL)
            response = stt.recognize(
                audio=audio_file, content_type=cont_type, model=lang)
            result_json = response.result
            for i in range(len(result_json["results"])):
                logger.debug(
                    result_json["results"][i]["alternatives"][0]["transcript"])
            return result_json["results"][0]["alternatives"][0]["transcript"]
    except:
        return ""
Beispiel #23
0
def spe2tex(directory):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)
    with open(directory, 'rb') as audio_file:
        return (json.dumps(speech_to_text.recognize(
            audio_file,
            content_type='audio/mp3',
            timestamps=True,
            model='en-US_BroadbandModel',
            word_confidence=True),
                           indent=2,
                           encoding='UTF-8',
                           ensure_ascii=False))
Beispiel #24
0
def audio2text(config,url):  # return the response from IBM for speech recognition
    speech_to_text = SpeechToTextV1(
            username=config['username'],
            password=config['password'],
            url='https://stream.watsonplatform.net/speech-to-text/api')
    #        ibm.com/watson/developercloud/speech-to-text/api/v1/python.html?python#recognize-sessionless
    with open(get_path(url), 'rb') as audio_file:
        speech_recognition_results = speech_to_text.recognize(
            audio=audio_file,  # file
            content_type='audio/wav',  # specify audio type
            model='en-US_BroadbandModel',  # speech recognition model
            smart_formatting=False,  # identify proper noun
            timestamps=True,  # return timestamps of each word
            max_alternatives=1)  # number of guessed word
        return speech_recognition_results
def getTextFromFile(audio_file):
    speech_to_text = SpeechToTextV1(iam_apikey=api_key, url=url)
    speech_to_text.set_detailed_response(True)

    speech_recognition_results = speech_to_text.recognize(
        audio=audio_file,
        content_type='audio/wav',
        timestamps=True
    ).get_result()['results']

    result = speech_recognition_results[0]
    alternatives = result['alternatives']
    alternative = (SpeechRecognitionAlternative)(alternatives[0])
    transcript = alternative.transcript['transcript']
    return transcript
Beispiel #26
0
    def __init__(self):
        self.CHUNK = 1024
        self.BUF_MAX_SIZE = self.CHUNK * 10
        self.q = Queue(maxsize=int(round(self.BUF_MAX_SIZE / self.CHUNK)))
        self.audio_source = AudioSource(self.q, True, True)
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 44100

        self.__apikey_stt = Config().Get("SpeechToText", "WatsonSTTAPIKey")
        self.__url_stt = Config().Get("SpeechToText", "WatsonSTTUrl")

        self.__apikey_tts = Config().Get("TextToSpeech", "WatsonTTSAPIKey")
        self.__url_tts = Config().Get("TextToSpeech", "WatsonTTSUrl")

        self.__voiceName = Config().Get("TextToSpeech", "WatsonVoiceName")

        self.__language_2letter_cc = Config().Get("SpeechToText",
                                                  "CountryCode2Letter")
        self.__language_4letter_cc = Config().Get("SpeechToText",
                                                  "CountryCode4Letter")
        self.__audioPlayer = Config().Get("TextToSpeech",
                                          "AudioPlayer") + " '{0}'"

        self.text_to_speech = TextToSpeechV1(url=self.__url_tts,
                                             iam_apikey=self.__apikey_tts)
        self.text_to_speech.set_default_headers(
            {'x-watson-learning-opt-out': "true"})

        self.speech_to_text = SpeechToTextV1(url=self.__url_stt,
                                             iam_apikey=self.__apikey_stt)
        self.speech_to_text.set_default_headers(
            {'x-watson-learning-opt-out': "true"})

        self.audio = pyaudio.PyAudio()

        # open stream using callback
        self.stream = self.audio.open(format=self.FORMAT,
                                      channels=self.CHANNELS,
                                      rate=self.RATE,
                                      input=True,
                                      frames_per_buffer=self.CHUNK,
                                      stream_callback=self.pyaudio_callback,
                                      start=False)
        try:
            rospy.init_node('STT_watson_node', anonymous=True)
        except:
            FileLogger().Info('already initialized')
def speech_2_text(file_name):
    speech_to_text = SpeechToTextV1(username='',
                                    password='',
                                    x_watson_learning_opt_out=False)
    speech_to_text.get_model('en-US_BroadbandModel')
    with open(file_name, 'rb') as audio_file:
        results = speech_to_text.recognize(audio_file,
                                           content_type='audio/wav',
                                           timestamps=True,
                                           word_confidence=True)
        first_array = results["results"]
        transcript = ''
        for element in first_array:
            transcript += element["alternatives"][0]["transcript"] + ' '

        return transcript
Beispiel #28
0
def submit(request):
    info = request.POST['info']
    print("Submit worked")
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)

    print(json.dumps(speech_to_text.models(), indent=2))

    print(
        json.dumps(speech_to_text.get_model('en-US_BroadbandModel'), indent=2))

    text = transcribe_audio('../speech.wav')
    print(text)
    return render(request, "about.html", {"text": text})
Beispiel #29
0
def speech_to_text(path):
    speech_to_text = SpeechToTextV1(
    username = "******",
    password = "******",
    x_watson_learning_opt_out=False
	)
    with open(join(dirname(__file__), path),
	          'rb') as audio_file:
        json_text=((speech_to_text.recognize(
	        audio_file, content_type='audio/wav', timestamps=True,
	        word_confidence=False)))
        sec=int(json_text['results'][0]['alternatives'][0]['timestamps'][-1][2])
    json_analysis=analyze(json_text['results'][0]['alternatives'][0]['transcript'],sec )
    print(json_analysis)
    #Send to server
    db['Text'].insert_one(json_text)
def text_json(out_f="out1.mp3", lang_k="ja-JP_BroadbandModel"):
    user = '******'
    pswd = 'パスワード'
    audio_file = open(out_f, "rb")
    ext = os.path.splitext(out_f)[1][1:]
    cont_type = "audio/" + ext
    print(cont_type)
    lang = lang_k
    # ワトソンとの送信と受信
    stt = SpeechToTextV1(username=user, password=pswd)
    result_json = stt.recognize(audio=audio_file,
                                content_type=cont_type,
                                model=lang).get_result()
    # ファイルの保存
    with open("result.json", "w") as f:
        json.dump(result_json, f, ensure_ascii=False, indent=2)