Beispiel #1
0
def interpret_dication(wave_filepath):
    ap = AddressProvider()
    address = ap.get("dictation")
    dc = DictationClient(address)

    # Read wave file
    audio = get_audio(wave_filepath)

    # Run Pathfinder
    try:
        results = dc.recognize(method="sync", audio=audio)
    except grpc.RpcError as e:
        error_log(
            "[Server-side error] Received following RPC error from the Pathfinder service:",
            str(e))
        import sys
        sys.exit(1)

    transcription = ""
    for idx, response in enumerate(results):
        if not len(response):
            log("No phrases detected.")
        else:
            transcription += "\"{}\"".format(response['transcript'])
    return transcription
Beispiel #2
0
def run(output_wave_filename, input_text):
    # Config:
    output_wave_file = output_wave_filename
    ap = AddressProvider()
    address = ap.get("trybun")
    sampling_rate = 44100
    input_text = input_text

    # Establish GRPC channel
    channel = grpc.insecure_channel(address)
    stub = TTS_pb2.TTSStub(channel)

    # Synthesis request
    config = TTS_pb2.SynthesizeConfig(sample_rate_hertz=44100)
    request = TTS_pb2.SynthesizeRequest(text=input_text, config=config)
    ws = WaveSaver()
    for response in stub.Synthesize(request):
        if response.HasField('error'):
            print("Error [" + str(response.error.code) + "]: " +
                  response.error.description)
            break
        else:
            if ws._samplerate:
                if ws._samplerate != response.audio.sample_rate_hertz:
                    raise RuntimeError(
                        "Sample rate does not match previously received")
            else:
                ws.setFrameRate(response.audio.sample_rate_hertz)
            ws.append(response.audio.content)
            if response.audio.end_of_stream:
                ws.save(output_wave_file)
    ws.clear()
 def __init__(self, wav_filepath=None, ssldir=None):
     if wav_filepath:
         self.wave = op.join(
             wav_filepath
         )  # Path to wave file with speech to be recognized. Should be mono, 8kHz or 16kHz.
     else:
         self.wave = None
     if ssldir:
         assert op.isdir(ssldir) & \
                op.isfile(op.join(ssldir, "client.crt")) & \
                op.isfile(op.join(ssldir, "client.key")) & \
                op.isfile(op.join(ssldir, "ca.crt"))
     self.ssl_directory = ssldir
     ap = AddressProvider()
     self.address = ap.get("dictation")
     self.grpc_timeout = 0  #Timeout in milliseconds used to set gRPC deadline - how long the client is willing to wait for a reply from the server.
     # If not specified, the service will set the deadline to a very large number.
     self.interim_results = False  # If set - messages with temporal results will be shown.
     self.mic = False  # Use microphone as an audio source (instead of wave file).
     self.no_input_timeout = 5000  # MRCP v2 no input timeout [ms].
     self.recognition_timeout = 15000  # MRCP v2 recognition timeout [ms].
     self.session_id = None  # Session ID to be passed to the service. If not specified, the service will generate a default session ID itself.
     self.single_utterance = False  # If set - the recognizer will detect a single spoken utterance.
     self.speech_complete_timeout = 5000  # MRCP v2 speech complete timeout [ms].
     self.speech_incomplete_timeout = 6000  # MRCP v2 speech incomplete timeout [ms].
     self.time_offsets = False  # If set - the recognizer will return also word time offsets.
     self.context_phrase = ""  # Specifies which context model to use.
def recognize(filename):
    ap = AddressProvider()
    wave_file = filename
    grammar_file = "Grammatic_Frames/Command_structure.abnf"
    address = ap.get("sarmata")

    audio = load_wave(wave_file)

    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)
    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)

    tmp = print_again(results)
    semantic = tmp[0]
    if semantic == "NO_MATCH":
        log("Unrecognized command.")
    else:
        log("Recognized command: ", semantic)

    result = semantic.split(" ")
    if len(result) == 1:
        return result[0], None
    else:
        return result[0], result[1]
Beispiel #5
0
 def __init__(self, wav_filepath=None, grammar=None):
     ap = AddressProvider()
     if grammar:
         self.grammar = grammar
     if wav_filepath:
         self.wave = opjoin(wav_filepath)
     self.address = ap.get("sarmata")
Beispiel #6
0
    def text_to_wave(self, text):
        # Config:
        output_wave_file = "waves\output6."
        ap = AddressProvider()
        address = ap.get("tribune")
        sampling_rate = 44100
        input_text = text

        call_synthesize(address, input_text, output_wave_file, sampling_rate)
Beispiel #7
0
    def say_something(self, text):
        # Config:
        output_wave_file = 'tts_output.wav'
        ap = AddressProvider()
        address = ap.get("tribune")
        sampling_rate = 44100
        input_text = text

        call_synthesize(address, input_text, output_wave_file, sampling_rate)
        self.playWave()
Beispiel #8
0
def get_results(wave_file, grammar_file):
    ap = AddressProvider()
    address = ap.get("sarmata")
    audio = load_wave(wave_file)
    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)
    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)
    return results
Beispiel #9
0
def getReply(canRaise):
    from techmo_sarmata_pyclient.utils.wave_loader import load_wave
    from techmo_sarmata_pyclient.service.sarmata_settings import SarmataSettings
    from techmo_sarmata_pyclient.service.sarmata_recognize import SarmataRecognizer
    from address_provider import AddressProvider
    import os

    WAVE_OUTPUT_FILENAME = "temp.wav"
    DISTRUST_FACTOR = 0.1

    record(WAVE_OUTPUT_FILENAME, True)

    #analyze
    #if __name__ == '__main__':
    ap = AddressProvider()
    if canRaise == True:
        grammar_file = "grammars/grammar_canraise.abnf"
    else:
        grammar_file = "grammars/grammar_cannotraise.abnf"
    address = ap.get("sarmata")

    audio = load_wave(WAVE_OUTPUT_FILENAME)

    settings = SarmataSettings()
    session_id = os.path.basename(WAVE_OUTPUT_FILENAME)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)

    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)
    player_answer_list = get_results(results)

    length = len(player_answer_list[1])
    if length == 0:
        return 'NO COMMAND DETECTED'
    sortedAnswerList = sorted(player_answer_list[1],
                              key=takeSecond,
                              reverse=True)
    #print(sortedAnswerList)
    if sortedAnswerList[0][1] < 2 * DISTRUST_FACTOR:
        return 'NO COMMAND DETECTED'
    if length >= 2:
        if sortedAnswerList[0][1] - DISTRUST_FACTOR < sortedAnswerList[1][1]:
            return 'NO COMMAND DETECTED'
        if canRaise == True:
            words = sortedAnswerList[0][2].split()
            if words[0] == 'Stawiam':
                sum = 0
                for i in range(len(words)):
                    if i > 0:
                        sum = sum + int(words[i])
                return 'Stawiam ' + str(sum)
    return sortedAnswerList[0][2]
Beispiel #10
0
 def __init__(self, wav_filepath=None):
     if wav_filepath:
         self.wave = opjoin(
             wav_filepath
         )  # Path to wave file with speech to be recognized. Should be mono, 8kHz or 16kHz.
     else:
         self.wave = None
     ap = AddressProvider()
     self.address = ap.get("dictation")
     self.interim_results = False  # If set - messages with temporal results will be shown.
     self.mic = True  # Use microphone as an audio source (instead of wave file).
     self.no_input_timeout = 5000  # MRCP v2 no input timeout [ms].
     self.recognition_timeout = 15000  # MRCP v2 recognition timeout [ms].
     self.session_id = None  # Session ID to be passed to the service. If not specified, the service will generate a default session ID itself.
     self.single_utterance = True  # If set - the recognizer will detect a single spoken utterance.
     self.speech_complete_timeout = 2000  # MRCP v2 speech complete timeout [ms].
     self.speech_incomplete_timeout = 6000  # MRCP v2 speech incomplete timeout [ms].
     self.time_offsets = False  # If set - the recognizer will return also word time offsets.
Beispiel #11
0
def recognize_numbers(wave_file):
    ap = AddressProvider()
    grammar_file = "grammars/cyfry.abnf"
    address = ap.get("sarmata")

    audio = load_wave(wave_file)

    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)

    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)
    #print_results(results)
    pass_words = glue_numbers(results)

    return pass_words
Beispiel #12
0
def run():
    ap = AddressProvider()
    wave_file = "waves/answer.wav"
    grammar_file = "grammars/movie_grammar.abnf"
    address = ap.get("sarmata")

    audio = load_wave(wave_file)

    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)
    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)

    info_info = print_results2(results)
    informacja_semantyczna = info_info[0]
    print(informacja_semantyczna + " sarmata ")
    return informacja_semantyczna
Beispiel #13
0
def find_keywords(phrases, wave_filename):
    ap = AddressProvider()
    pathfinder_address = ap.get("pathfinder")
    pf = PathfinderClient(pathfinder_address)

    # Read wave file

    with wave.open(wave_filename) as f:
        fs = f.getframerate()
        signal = f.readframes(f.getnframes())

    # Run Pathfinder
    try:
        response_iterator = pf.RunPathinder(phrases=phrases,
                                            audio=signal,
                                            sampling_rate=fs,
                                            method="sync")
    except grpc.RpcError as e:
        print(
            "[Server-side error] Received following RPC error from the Pathfinder service:",
            str(e))
        import sys

        sys.exit(1)

    for idx, response in enumerate(response_iterator):
        if not len(response.phrases):
            print("No phrases detected.")
        else:
            print("Found:")
            spotted_phrases = sorted(({
                'start': spotted.start,
                'end': spotted.end,
                'phrase': spotted.phrase,
                'score': spotted.score
            } for spotted in response.phrases),
                                     key=lambda k: (k['start'], k['score']))
            for idx, spotted in enumerate(spotted_phrases):
                print("{} - [ {} ms - {} ms ]\t{}".format(
                    idx, spotted['start'], spotted['end'], spotted['phrase']))

            return spotted_phrases
Beispiel #14
0
def sarmata(__name__):
    ap = AddressProvider()

    wave_file = "waves/chess_command.wav"  # nagranie
    grammar_file = "grammars/chess.abnf"  # gramatyka
    address = ap.get("sarmata")

    audio = load_wave(wave_file)

    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)

    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)

    move = print_results(results)  # zapis ruchu - move = nasz ruch

    if move == None:
        move = ""

    #wypisuje komendę (string) rozpoznaną - ze spacjami
    #print("długośc ruchu", len(move))

    i = 0
    move_bez_spacji = ""

    while i < len(move):

        if move[i] != " ":
            move_bez_spacji = move_bez_spacji + move[i]

        i = i + 1

    #wypisuje komendę po usunięciu spacji i długość tej komedy (stringa)
    #print("bez spacji: ", move_bez_spacji, "długość: ", len(move))

    return move_bez_spacji
from techmo_dictation_pathfinder_pyclients.audio_provider import get_audio
from techmo_dictation_pathfinder_pyclients.dictation_client import DictationClient
from address_provider import AddressProvider
import grpc

if __name__ == '__main__':
    ap = AddressProvider()
    address = ap.get("dictation")
    dc = DictationClient(address)

    # Read wave file
    wave_filepath = "waves/example_atm.wav"
    audio = get_audio(wave_filepath)

    # Run Pathfinder
    try:
        results = dc.recognize(method="sync", audio=audio)
    except grpc.RpcError as e:
        print(
            "[Server-side error] Received following RPC error from the Pathfinder service:",
            str(e))
        import sys
        sys.exit(1)

    for idx, response in enumerate(results):
        if not len(response):
            print("No phrases detected.")
        else:
            print("Transcription:")
            print("\"{}\"".format(response['transcript']))
#!/usr/bin/env python3
# coding=utf-8

from techmo_dictation_pathfinder_pyclients.pathfinder_client import PathfinderClient
from address_provider import AddressProvider
import wave
import grpc

if __name__ == '__main__':
    ap = AddressProvider()
    pathfinder_address = ap.get("pathfinder")
    pf = PathfinderClient(pathfinder_address)

    # Define phrases you want to spot
    phrases = ['handel', 'weekend']

    # Read wave file
    wave_filename = "waves/example.wav"
    with wave.open(wave_filename) as f:
        fs = f.getframerate()
        signal = f.readframes(f.getnframes())

    # Run Pathfinder
    try:
        response_iterator = pf.RunPathinder(phrases=phrases,
                                            audio=signal,
                                            sampling_rate=fs,
                                            method="sync")
    except grpc.RpcError as e:
        print(
            "[Server-side error] Received following RPC error from the Pathfinder service:",
Beispiel #17
0
#!/usr/bin/env python3
# coding=utf-8

from tts.call_synthesize import call_synthesize
from address_provider import AddressProvider

if __name__ == '__main__':
    # Config:
    output_wave_file = 'tts_output.wav'
    ap = AddressProvider()
    address = ap.get("tts")
    sampling_rate = 44100
    input_text = "Ala ma kota i chciałaby zaliczyć Technologię Mowy w dwa tysiące dwudziestym roku na ocenę 5.0"

    call_synthesize(address, input_text, output_wave_file, sampling_rate)
Beispiel #18
0
            print("[ERROR]: {}".format(response.error))

        for n, res in enumerate(response.results):
            transcript = " ".join([word.transcript for word in res.words])
            print("[{}.] {} /{}/ ({})".format(n, transcript,
                                              res.semantic_interpretation,
                                              res.confidence))

            if n == 0:
                print("hallo, dalej przekazuję:", res.semantic_interpretation)
                move = res.semantic_interpretation
                return move


if __name__ == '__main__':
    ap = AddressProvider()
    # wave_file = "waves/example_cyfry.wav"

    wave_file = "waves/chess_command.wav"  #nagranie
    grammar_file = "grammars/chess.abnf"  # gramatyka
    address = ap.get("sarmata")

    audio = load_wave(wave_file)

    settings = SarmataSettings()
    session_id = os.path.basename(wave_file)
    settings.set_session_id(session_id)
    settings.load_grammar(grammar_file)

    recognizer = SarmataRecognizer(address)
    results = recognizer.recognize(audio, settings)
 def __init__(self, wav_filepath=None):
     ap = AddressProvider()
     if wav_filepath:
         self.wave = opjoin(wav_filepath)
     self.address = ap.get("dictation")
#!/usr/bin/env python3
# coding=utf-8

from tribune.call_synthesize import call_synthesize
from address_provider import AddressProvider

if __name__ == '__main__':
    # Config:
    output_wave_file = "test.wav"
    ap = AddressProvider()
    address = ap.get("tribune")
    sampling_rate = 44100
    input_text = "Test."

    call_synthesize(address, input_text, output_wave_file, sampling_rate)