Example #1
0
def asr_client(
    id,
    output_file,
    input_file,
    num_iterations,
    simulate_realtime,
    jarvis_uri,
    max_alternatives,
    automatic_punctuation,
    word_time_offsets,
):

    CHUNK = 1600
    channel = grpc.insecure_channel(jarvis_uri)
    wf = wave.open(input_file, 'rb')

    frames = wf.getnframes()
    rate = wf.getframerate()
    duration = frames / float(rate)
    if id == 0:
        print("File duration: %.2fs" % duration)

    client = jasr_srv.JarvisASRStub(channel)
    config = jasr.RecognitionConfig(
        encoding=ja.AudioEncoding.LINEAR_PCM,
        sample_rate_hertz=wf.getframerate(),
        language_code="en-US",
        max_alternatives=max_alternatives,
        enable_automatic_punctuation=automatic_punctuation,
        enable_word_time_offsets=word_time_offsets,
    )

    streaming_config = jasr.StreamingRecognitionConfig(
        config=config, interim_results=True)  # read data

    def generator(w, s, num_iterations, output_file):
        try:
            for i in range(num_iterations):
                w = wave.open(input_file, 'rb')
                start_time = time.time()
                yield jasr.StreamingRecognizeRequest(streaming_config=s)
                num_requests = 0
                while 1:
                    d = w.readframes(CHUNK)
                    if len(d) <= 0:
                        break
                    num_requests += 1
                    if simulate_realtime:
                        time_to_sleep = max(
                            0.0, CHUNK / rate * num_requests -
                            (time.time() - start_time))
                        time.sleep(time_to_sleep)
                    yield jasr.StreamingRecognizeRequest(audio_content=d)
                w.close()
        except Exception as e:
            print(e)

    responses = client.StreamingRecognize(
        generator(wf, streaming_config, num_iterations, output_file))
    print_to_file(responses, output_file, max_alternatives, word_time_offsets)
def main():
    args = get_args()

    if args.list_devices:
        p = pyaudio.PyAudio()
        for i in range(p.get_device_count()):
            info = p.get_device_info_by_index(i)
            if info['maxInputChannels'] < 1:
                continue
            print(f"{info['index']}: {info['name']}")
        sys.exit(0)

    channel = grpc.insecure_channel(args.server)
    client = jasr_srv.JarvisASRStub(channel)

    config = jasr.RecognitionConfig(
        encoding=ja.AudioEncoding.LINEAR_PCM,
        sample_rate_hertz=RATE,
        language_code="en-US",
        max_alternatives=1,
        enable_automatic_punctuation=True,
    )
    streaming_config = jasr.StreamingRecognitionConfig(config=config,
                                                       interim_results=True)

    with MicrophoneStream(RATE, CHUNK, device=args.input_device) as stream:
        audio_generator = stream.generator()
        requests = (jasr.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        def build_generator(cfg, gen):
            yield jasr.StreamingRecognizeRequest(streaming_config=cfg)
            for x in gen:
                yield x

        responses = client.StreamingRecognize(
            build_generator(streaming_config, requests))

        listen_print_loop(responses)
Example #3
0
import jarvis_api.jarvis_nlp_pb2_grpc as jnlp_srv

# ASR proto
import jarvis_api.jarvis_asr_pb2 as jasr
import jarvis_api.jarvis_asr_pb2_grpc as jasr_srv

# TTS proto
import jarvis_api.jarvis_tts_pb2 as jtts
import jarvis_api.jarvis_tts_pb2_grpc as jtts_srv
import jarvis_api.audio_pb2 as ja

import json
app = Flask(__name__)

channel = grpc.insecure_channel('localhost:50051')
jarvis_asr = jasr_srv.JarvisASRStub(channel)
jarvis_nlp = jnlp_srv.JarvisNLPStub(channel)
jarvis_tts = jtts_srv.JarvisTTSStub(channel)


def TTS(text):
    req = jtts.SynthesizeSpeechRequest()
    req.text = text
    req.language_code = "en-US"  # currently required to be "en-US"
    req.encoding = ja.AudioEncoding.LINEAR_PCM  # Supports LINEAR_PCM, FLAC, MULAW and ALAW audio encodings
    req.sample_rate_hz = 22050  # ignored, audio returned will be 22.05KHz
    req.voice_name = "ljspeech"  # ignored

    resp = jarvis_tts.Synthesize(req)
    audio_samples = np.frombuffer(resp.audio, dtype=np.float32)
    return {'content': audio_samples.tolist(), 'sr': 22050}
Example #4
0
                        default="localhost:50051",
                        type=str,
                        help="URI to GRPC server endpoint")
    parser.add_argument("--audio-file",
                        required=True,
                        help="path to local file to stream")
    return parser.parse_args()


args = get_args()

wf = wave.open(args.audio_file, 'rb')
with open(args.audio_file, 'rb') as fh:
    data = fh.read()

channel = grpc.insecure_channel(args.server)
client = jasr_srv.JarvisASRStub(channel)
config = jasr.RecognitionConfig(
    encoding=ja.AudioEncoding.LINEAR_PCM,
    sample_rate_hertz=wf.getframerate(),
    language_code="en-US",
    max_alternatives=1,
    enable_automatic_punctuation=False,
    audio_channel_count=1,
)

request = jasr.RecognizeRequest(config=config, audio=data)

response = client.Recognize(request)
print(response)