def interpret_dication(wave_filepath): ap = AddressProvider() address = ap.get("dictation") dc = DictationClient(address) # Read wave file audio = get_audio(wave_filepath) # Run Pathfinder try: results = dc.recognize(method="sync", audio=audio) except grpc.RpcError as e: error_log( "[Server-side error] Received following RPC error from the Pathfinder service:", str(e)) import sys sys.exit(1) transcription = "" for idx, response in enumerate(results): if not len(response): log("No phrases detected.") else: transcription += "\"{}\"".format(response['transcript']) return transcription
def run(output_wave_filename, input_text): # Config: output_wave_file = output_wave_filename ap = AddressProvider() address = ap.get("trybun") sampling_rate = 44100 input_text = input_text # Establish GRPC channel channel = grpc.insecure_channel(address) stub = TTS_pb2.TTSStub(channel) # Synthesis request config = TTS_pb2.SynthesizeConfig(sample_rate_hertz=44100) request = TTS_pb2.SynthesizeRequest(text=input_text, config=config) ws = WaveSaver() for response in stub.Synthesize(request): if response.HasField('error'): print("Error [" + str(response.error.code) + "]: " + response.error.description) break else: if ws._samplerate: if ws._samplerate != response.audio.sample_rate_hertz: raise RuntimeError( "Sample rate does not match previously received") else: ws.setFrameRate(response.audio.sample_rate_hertz) ws.append(response.audio.content) if response.audio.end_of_stream: ws.save(output_wave_file) ws.clear()
def __init__(self, wav_filepath=None, ssldir=None): if wav_filepath: self.wave = op.join( wav_filepath ) # Path to wave file with speech to be recognized. Should be mono, 8kHz or 16kHz. else: self.wave = None if ssldir: assert op.isdir(ssldir) & \ op.isfile(op.join(ssldir, "client.crt")) & \ op.isfile(op.join(ssldir, "client.key")) & \ op.isfile(op.join(ssldir, "ca.crt")) self.ssl_directory = ssldir ap = AddressProvider() self.address = ap.get("dictation") self.grpc_timeout = 0 #Timeout in milliseconds used to set gRPC deadline - how long the client is willing to wait for a reply from the server. # If not specified, the service will set the deadline to a very large number. self.interim_results = False # If set - messages with temporal results will be shown. self.mic = False # Use microphone as an audio source (instead of wave file). self.no_input_timeout = 5000 # MRCP v2 no input timeout [ms]. self.recognition_timeout = 15000 # MRCP v2 recognition timeout [ms]. self.session_id = None # Session ID to be passed to the service. If not specified, the service will generate a default session ID itself. self.single_utterance = False # If set - the recognizer will detect a single spoken utterance. self.speech_complete_timeout = 5000 # MRCP v2 speech complete timeout [ms]. self.speech_incomplete_timeout = 6000 # MRCP v2 speech incomplete timeout [ms]. self.time_offsets = False # If set - the recognizer will return also word time offsets. self.context_phrase = "" # Specifies which context model to use.
def recognize(filename): ap = AddressProvider() wave_file = filename grammar_file = "Grammatic_Frames/Command_structure.abnf" address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) tmp = print_again(results) semantic = tmp[0] if semantic == "NO_MATCH": log("Unrecognized command.") else: log("Recognized command: ", semantic) result = semantic.split(" ") if len(result) == 1: return result[0], None else: return result[0], result[1]
def __init__(self, wav_filepath=None, grammar=None): ap = AddressProvider() if grammar: self.grammar = grammar if wav_filepath: self.wave = opjoin(wav_filepath) self.address = ap.get("sarmata")
def text_to_wave(self, text): # Config: output_wave_file = "waves\output6." ap = AddressProvider() address = ap.get("tribune") sampling_rate = 44100 input_text = text call_synthesize(address, input_text, output_wave_file, sampling_rate)
def say_something(self, text): # Config: output_wave_file = 'tts_output.wav' ap = AddressProvider() address = ap.get("tribune") sampling_rate = 44100 input_text = text call_synthesize(address, input_text, output_wave_file, sampling_rate) self.playWave()
def get_results(wave_file, grammar_file): ap = AddressProvider() address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) return results
def getReply(canRaise): from techmo_sarmata_pyclient.utils.wave_loader import load_wave from techmo_sarmata_pyclient.service.sarmata_settings import SarmataSettings from techmo_sarmata_pyclient.service.sarmata_recognize import SarmataRecognizer from address_provider import AddressProvider import os WAVE_OUTPUT_FILENAME = "temp.wav" DISTRUST_FACTOR = 0.1 record(WAVE_OUTPUT_FILENAME, True) #analyze #if __name__ == '__main__': ap = AddressProvider() if canRaise == True: grammar_file = "grammars/grammar_canraise.abnf" else: grammar_file = "grammars/grammar_cannotraise.abnf" address = ap.get("sarmata") audio = load_wave(WAVE_OUTPUT_FILENAME) settings = SarmataSettings() session_id = os.path.basename(WAVE_OUTPUT_FILENAME) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) player_answer_list = get_results(results) length = len(player_answer_list[1]) if length == 0: return 'NO COMMAND DETECTED' sortedAnswerList = sorted(player_answer_list[1], key=takeSecond, reverse=True) #print(sortedAnswerList) if sortedAnswerList[0][1] < 2 * DISTRUST_FACTOR: return 'NO COMMAND DETECTED' if length >= 2: if sortedAnswerList[0][1] - DISTRUST_FACTOR < sortedAnswerList[1][1]: return 'NO COMMAND DETECTED' if canRaise == True: words = sortedAnswerList[0][2].split() if words[0] == 'Stawiam': sum = 0 for i in range(len(words)): if i > 0: sum = sum + int(words[i]) return 'Stawiam ' + str(sum) return sortedAnswerList[0][2]
def __init__(self, wav_filepath=None): if wav_filepath: self.wave = opjoin( wav_filepath ) # Path to wave file with speech to be recognized. Should be mono, 8kHz or 16kHz. else: self.wave = None ap = AddressProvider() self.address = ap.get("dictation") self.interim_results = False # If set - messages with temporal results will be shown. self.mic = True # Use microphone as an audio source (instead of wave file). self.no_input_timeout = 5000 # MRCP v2 no input timeout [ms]. self.recognition_timeout = 15000 # MRCP v2 recognition timeout [ms]. self.session_id = None # Session ID to be passed to the service. If not specified, the service will generate a default session ID itself. self.single_utterance = True # If set - the recognizer will detect a single spoken utterance. self.speech_complete_timeout = 2000 # MRCP v2 speech complete timeout [ms]. self.speech_incomplete_timeout = 6000 # MRCP v2 speech incomplete timeout [ms]. self.time_offsets = False # If set - the recognizer will return also word time offsets.
def recognize_numbers(wave_file): ap = AddressProvider() grammar_file = "grammars/cyfry.abnf" address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) #print_results(results) pass_words = glue_numbers(results) return pass_words
def run(): ap = AddressProvider() wave_file = "waves/answer.wav" grammar_file = "grammars/movie_grammar.abnf" address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) info_info = print_results2(results) informacja_semantyczna = info_info[0] print(informacja_semantyczna + " sarmata ") return informacja_semantyczna
def find_keywords(phrases, wave_filename): ap = AddressProvider() pathfinder_address = ap.get("pathfinder") pf = PathfinderClient(pathfinder_address) # Read wave file with wave.open(wave_filename) as f: fs = f.getframerate() signal = f.readframes(f.getnframes()) # Run Pathfinder try: response_iterator = pf.RunPathinder(phrases=phrases, audio=signal, sampling_rate=fs, method="sync") except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the Pathfinder service:", str(e)) import sys sys.exit(1) for idx, response in enumerate(response_iterator): if not len(response.phrases): print("No phrases detected.") else: print("Found:") spotted_phrases = sorted(({ 'start': spotted.start, 'end': spotted.end, 'phrase': spotted.phrase, 'score': spotted.score } for spotted in response.phrases), key=lambda k: (k['start'], k['score'])) for idx, spotted in enumerate(spotted_phrases): print("{} - [ {} ms - {} ms ]\t{}".format( idx, spotted['start'], spotted['end'], spotted['phrase'])) return spotted_phrases
def sarmata(__name__): ap = AddressProvider() wave_file = "waves/chess_command.wav" # nagranie grammar_file = "grammars/chess.abnf" # gramatyka address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings) move = print_results(results) # zapis ruchu - move = nasz ruch if move == None: move = "" #wypisuje komendę (string) rozpoznaną - ze spacjami #print("długośc ruchu", len(move)) i = 0 move_bez_spacji = "" while i < len(move): if move[i] != " ": move_bez_spacji = move_bez_spacji + move[i] i = i + 1 #wypisuje komendę po usunięciu spacji i długość tej komedy (stringa) #print("bez spacji: ", move_bez_spacji, "długość: ", len(move)) return move_bez_spacji
from techmo_dictation_pathfinder_pyclients.audio_provider import get_audio from techmo_dictation_pathfinder_pyclients.dictation_client import DictationClient from address_provider import AddressProvider import grpc if __name__ == '__main__': ap = AddressProvider() address = ap.get("dictation") dc = DictationClient(address) # Read wave file wave_filepath = "waves/example_atm.wav" audio = get_audio(wave_filepath) # Run Pathfinder try: results = dc.recognize(method="sync", audio=audio) except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the Pathfinder service:", str(e)) import sys sys.exit(1) for idx, response in enumerate(results): if not len(response): print("No phrases detected.") else: print("Transcription:") print("\"{}\"".format(response['transcript']))
#!/usr/bin/env python3 # coding=utf-8 from techmo_dictation_pathfinder_pyclients.pathfinder_client import PathfinderClient from address_provider import AddressProvider import wave import grpc if __name__ == '__main__': ap = AddressProvider() pathfinder_address = ap.get("pathfinder") pf = PathfinderClient(pathfinder_address) # Define phrases you want to spot phrases = ['handel', 'weekend'] # Read wave file wave_filename = "waves/example.wav" with wave.open(wave_filename) as f: fs = f.getframerate() signal = f.readframes(f.getnframes()) # Run Pathfinder try: response_iterator = pf.RunPathinder(phrases=phrases, audio=signal, sampling_rate=fs, method="sync") except grpc.RpcError as e: print( "[Server-side error] Received following RPC error from the Pathfinder service:",
#!/usr/bin/env python3 # coding=utf-8 from tts.call_synthesize import call_synthesize from address_provider import AddressProvider if __name__ == '__main__': # Config: output_wave_file = 'tts_output.wav' ap = AddressProvider() address = ap.get("tts") sampling_rate = 44100 input_text = "Ala ma kota i chciałaby zaliczyć Technologię Mowy w dwa tysiące dwudziestym roku na ocenę 5.0" call_synthesize(address, input_text, output_wave_file, sampling_rate)
print("[ERROR]: {}".format(response.error)) for n, res in enumerate(response.results): transcript = " ".join([word.transcript for word in res.words]) print("[{}.] {} /{}/ ({})".format(n, transcript, res.semantic_interpretation, res.confidence)) if n == 0: print("hallo, dalej przekazuję:", res.semantic_interpretation) move = res.semantic_interpretation return move if __name__ == '__main__': ap = AddressProvider() # wave_file = "waves/example_cyfry.wav" wave_file = "waves/chess_command.wav" #nagranie grammar_file = "grammars/chess.abnf" # gramatyka address = ap.get("sarmata") audio = load_wave(wave_file) settings = SarmataSettings() session_id = os.path.basename(wave_file) settings.set_session_id(session_id) settings.load_grammar(grammar_file) recognizer = SarmataRecognizer(address) results = recognizer.recognize(audio, settings)
def __init__(self, wav_filepath=None): ap = AddressProvider() if wav_filepath: self.wave = opjoin(wav_filepath) self.address = ap.get("dictation")
#!/usr/bin/env python3 # coding=utf-8 from tribune.call_synthesize import call_synthesize from address_provider import AddressProvider if __name__ == '__main__': # Config: output_wave_file = "test.wav" ap = AddressProvider() address = ap.get("tribune") sampling_rate = 44100 input_text = "Test." call_synthesize(address, input_text, output_wave_file, sampling_rate)