def main(): args = StreamingRecognitionParser().parse_args() with audio_open_read(args.audio_file, args.encoding, args.rate, args.num_channels, args.chunk_size, args.pyaudio_max_seconds) as reader: stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args)) metadata = authorization_metadata(args.api_key, args.secret_key, "tinkoff.cloud.stt") responses = stub.StreamingRecognize(stt_generate_requests(args, reader), metadata=metadata) print_streaming_recognition_responses(responses)
def speech2text(binary_file): stub = stt_pb2_grpc.SpeechToTextStub( grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt") response = stub.Recognize(build_request(binary_file), metadata=metadata) print_recognition_response(response) output = '' for result in response.results: for alternative in result.alternatives: output += str(alternative.transcript) return output
def main(): args = BaseRecognitionParser().parse_args() if args.encoding == stt_pb2.RAW_OPUS: raise ValueError("RAW_OPUS encoding is not supported by this script") with audio_open_read(args.audio_file, args.encoding, args.rate, args.num_channels, args.chunk_size, args.pyaudio_max_seconds) as reader: stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args)) metadata = authorization_metadata(args.api_key, args.secret_key, "tinkoff.cloud.stt") response = stub.Recognize(build_recognition_request(args, reader), metadata=metadata) print_recognition_response(response)
def __init__(self, recognize): self._recognize = recognize r = stt_pb2.StreamingRecognizeRequest() r.streaming_config.config.encoding = stt_pb2.AudioEncoding.LINEAR16 r.streaming_config.config.sample_rate_hertz = 16000 r.streaming_config.config.num_channels = 1 r.streaming_config.config.enable_denormalization = True r.streaming_config.config.enable_automatic_punctuation = True r.streaming_config.config.vad_config.silence_duration_threshold = 1.20 r.streaming_config.single_utterance = True r.streaming_config.config.speech_contexts.append( stt_pb2.SpeechContext(phrases=[ stt_pb2.SpeechContextPhrase(text=text, score=10.0) for text in self._recognize.context_words ])) metadata = authorization_metadata(API_KEY, SECRET_KEY, "tinkoff.cloud.stt") stub = stt_pb2_grpc.SpeechToTextStub( grpc.secure_channel(ENDPOINT, grpc.ssl_channel_credentials())) self._responses = stub.StreamingRecognize(self.requests(r), metadata=metadata)
def main(): agi = AGI() #agi = None if agi != None: agi.verbose("EAGI script started...") ani = agi.env['agi_callerid'] uid = agi.env['agi_uniqueid'] agi.verbose("Call answered from: %s with id %s" % (ani, uid)) else: ani = "" uid = str(uuid.uuid4()) try: with dbcon.cursor() as cursor: sql = "INSERT INTO calls SET uniqueid=%s,callerid=%s,calldate=NOW()" cursor.execute(sql, (uid, ani)) call_id = cursor.lastrowid finally: dbcon.commit() data = { "type": "call", "unqueid": uid, "callerid": ani[-4:], "calldate": time.strftime('%Y-%m-%d %H:%M:%S'), "call_id": call_id } client.publish(cent_channel, data) if agi == None: ic(data) args = StreamingRecognitionParser().parse_args() stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args)) metadata = authorization_metadata(cfg.api_key, cfg.secret_key, "tinkoff.cloud.stt") responses = stub.StreamingRecognize(generate_requests(args, agi), metadata=metadata) save_streaming_recognition_responses(responses, agi, ani, uid, call_id)
def main(): args = BaseRecognitionParser().parse_args() total = '' if args.encoding == stt_pb2.RAW_OPUS: raise ValueError("RAW_OPUS encoding is not supported by this script") with audio_open_read(args.audio_file, args.encoding, args.rate, args.num_channels, args.chunk_size, args.pyaudio_max_seconds) as reader: stub = stt_pb2_grpc.SpeechToTextStub(make_channel(args)) metadata = authorization_metadata(args.api_key, args.secret_key, "tinkoff.cloud.stt") response = stub.Recognize(build_recognition_request(args, reader), metadata=metadata) if not isinstance(response, dict): # https://developers.google.com/protocol-buffers/docs/proto3#json response = MessageToDict(response, including_default_value_fields=True, preserving_proto_field_name=True) for result in response["results"]: for alternative in result["alternatives"]: total = total + alternative["transcript"] print(total)
def speach2text(url, tmp_dir='./tmp'): ts = int(time.time() * 10**6) wav_tmp = os.path.join(tmp_dir, '{}.wav'.format(ts)) oga_tmp = os.path.join(tmp_dir, '{}.oga'.format(ts)) r = urlopen(url) with open(oga_tmp, 'wb') as f: f.write(r.read()) stream = ffmpeg.input(oga_tmp) stream = ffmpeg.output(stream, wav_tmp) ffmpeg.run(stream, overwrite_output=True) stub = stt_pb2_grpc.SpeechToTextStub( grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt") responses = stub.StreamingRecognize(generate_requests(wav_tmp), metadata=metadata) text = next( responses).results[0].recognition_result.alternatives[0].transcript os.remove(wav_tmp) os.remove(oga_tmp) return text
with wave.open("../../audio/sample_5.wav") as f: yield build_first_request(f.getframerate(), f.getnchannels()) frame_samples = f.getframerate() // 10 # Send 100ms at a time for data in iter(lambda: f.readframes(frame_samples), b''): request = stt_pb2.StreamingRecognizeRequest() request.audio_content = data yield request except Exception as e: print("Got exception in generate_requests", e) raise def print_streaming_recognition_responses(responses): for response in responses: for result in response.results: print("Channel", result.recognition_result.channel) print("Phrase start:", result.recognition_result.start_time.ToTimedelta()) print("Phrase end: ", result.recognition_result.end_time.ToTimedelta()) for alternative in result.recognition_result.alternatives: print('"' + alternative.transcript + '"') print("------------------") stub = stt_pb2_grpc.SpeechToTextStub( grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt") responses = stub.StreamingRecognize(generate_requests(), metadata=metadata) print_streaming_recognition_responses(responses)