コード例 #1
0
    def __init__(self, config, *args, **kwargs):
        """
        Open a streaming channel to the Riva server for ASR.  This establishes a connection over GRPC 
        and sends/recieves the requests and responses asynchronously.  Incoming audio samples get put
        into a request queue that GRPC picks up, and a thread waits on responses to come in.
        """
        super(RivaASRService, self).__init__(config, *args, **kwargs)

        self.config.setdefault('server', 'localhost:50051')
        self.config.setdefault('sample_rate', 16000)
        self.config.setdefault('frame_length', 1.0)
        self.config.setdefault(
            'request_timeout',
            2.0)  # how long to wait for new audio to come in
        self.config.setdefault('response_timeout',
                               0.05)  # how long to wait for results from riva
        self.config.setdefault('language_code', 'en-US')
        self.config.setdefault('enable_automatic_punctuation', True)
        self.config.setdefault('top_k', 1)

        logging.info(f'Riva ASR service config:\n{self.config}')

        self.channel = grpc.insecure_channel(self.config.server)
        self.client = rasr_srv.RivaSpeechRecognitionStub(self.channel)

        self.recognition_config = rasr.RecognitionConfig(
            encoding=ra.AudioEncoding.LINEAR_PCM,
            sample_rate_hertz=self.config.sample_rate,
            language_code=self.config.language_code,
            max_alternatives=self.config.top_k,
            enable_word_time_offsets=True,
            enable_automatic_punctuation=self.config.
            enable_automatic_punctuation)

        self.streaming_config = rasr.StreamingRecognitionConfig(
            config=self.recognition_config, interim_results=True)

        self.request_queue = queue.Queue()
        self.request_queue.put(
            rasr.StreamingRecognizeRequest(
                streaming_config=self.streaming_config))

        self.responses = self.client.StreamingRecognize(self)
        self.responses_queue = queue.Queue()

        self.response_thread = threading.Thread(target=self.recieve_responses)
        self.response_thread.start()
コード例 #2
0
def main():
    args = get_args()

    if args.list_devices:
        p = pyaudio.PyAudio()
        for i in range(p.get_device_count()):
            info = p.get_device_info_by_index(i)
            if info['maxInputChannels'] < 1:
                continue
            print(f"{info['index']}: {info['name']}")
        sys.exit(0)

    channel = grpc.insecure_channel(args.server)
    client = rasr_srv.RivaSpeechRecognitionStub(channel)

    config = rasr.RecognitionConfig(
        encoding=ra.AudioEncoding.LINEAR_PCM,
        sample_rate_hertz=RATE,
        language_code="en-US",
        max_alternatives=1,
        enable_automatic_punctuation=True,
    )
    streaming_config = rasr.StreamingRecognitionConfig(config=config,
                                                       interim_results=True)

    with MicrophoneStream(RATE, CHUNK, device=args.input_device) as stream:
        audio_generator = stream.generator()
        requests = (rasr.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        def build_generator(cfg, gen):
            yield rasr.StreamingRecognizeRequest(streaming_config=cfg)
            for x in gen:
                yield x

        responses = client.StreamingRecognize(
            build_generator(streaming_config, requests))

        listen_print_loop(responses)
コード例 #3
0
def asr_client(
    id,
    output_file,
    input_file,
    num_iterations,
    simulate_realtime,
    riva_uri,
    max_alternatives,
    automatic_punctuation,
    word_time_offsets,
    verbatim_transcripts,
):

    CHUNK = 1600
    channel = grpc.insecure_channel(riva_uri)
    wf = wave.open(input_file, 'rb')

    frames = wf.getnframes()
    rate = wf.getframerate()
    duration = frames / float(rate)
    if id == 0:
        print("File duration: %.2fs" % duration)

    client = rasr_srv.RivaSpeechRecognitionStub(channel)
    config = rasr.RecognitionConfig(
        encoding=ra.AudioEncoding.LINEAR_PCM,
        sample_rate_hertz=wf.getframerate(),
        language_code="en-US",
        max_alternatives=max_alternatives,
        enable_automatic_punctuation=automatic_punctuation,
        enable_word_time_offsets=word_time_offsets,
        verbatim_transcripts=verbatim_transcripts,
    )

    streaming_config = rasr.StreamingRecognitionConfig(
        config=config, interim_results=True)  # read data

    def generator(w, s, num_iterations, output_file):
        try:
            for i in range(num_iterations):
                w = wave.open(input_file, 'rb')
                start_time = time.time()
                yield rasr.StreamingRecognizeRequest(streaming_config=s)
                num_requests = 0
                while 1:
                    d = w.readframes(CHUNK)
                    if len(d) <= 0:
                        break
                    num_requests += 1
                    if simulate_realtime:
                        time_to_sleep = max(
                            0.0, CHUNK / rate * num_requests -
                            (time.time() - start_time))
                        time.sleep(time_to_sleep)
                    yield rasr.StreamingRecognizeRequest(audio_content=d)
                w.close()
        except Exception as e:
            print(e)

    responses = client.StreamingRecognize(
        generator(wf, streaming_config, num_iterations, output_file))
    print_to_file(responses, output_file, max_alternatives, word_time_offsets)
コード例 #4
0

CHUNK = 1024
args = get_args()
wf = wave.open(args.audio_file, 'rb')

channel = grpc.insecure_channel(args.server)
client = rasr_srv.RivaSpeechRecognitionStub(channel)
config = rasr.RecognitionConfig(
    encoding=ra.AudioEncoding.LINEAR_PCM,
    sample_rate_hertz=wf.getframerate(),
    language_code="en-US",
    max_alternatives=1,
    enable_automatic_punctuation=True,
)
streaming_config = rasr.StreamingRecognitionConfig(config=config,
                                                   interim_results=True)


# read data
def generator(w, s):
    yield rasr.StreamingRecognizeRequest(streaming_config=s)
    d = w.readframes(CHUNK)
    while len(d) > 0:
        yield rasr.StreamingRecognizeRequest(audio_content=d)
        d = w.readframes(CHUNK)


responses = client.StreamingRecognize(generator(wf, streaming_config))
listen_print_loop(responses)