-
Notifications
You must be signed in to change notification settings - Fork 0
/
google_speech.py
122 lines (96 loc) · 4.56 KB
/
google_speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/python
from __future__ import division
import contextlib
import threading
import pyaudio
from gcloud.credentials import get_credentials
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
from google.rpc import code_pb2
from grpc.beta import implementations
import rospy
from google_cloud_speech.msg import ResultTranscript
from std_msgs.msg import Empty
# Audio recording parameters
RATE = 16000
CHANNELS = 1
CHUNK = int(RATE / 10) # 100ms
# Keep the request alive for this many seconds
DEADLINE_SECS = 8 * 60 * 60
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
class GoogleSpeech():
def __init__(self):
self.pub_transcript = rospy.Publisher('result_transcript', ResultTranscript, queue_size=5)
self.pub_start_speech = rospy.Publisher('start_of_speech', Empty, queue_size=10)
self.pub_end_speech = rospy.Publisher('end_of_speech', Empty, queue_size=10)
self.is_start_audio = False
self.is_start_speech = False
self.is_stop_audio = True
self.stop_audio = threading.Event()
with cloud_speech.beta_create_Speech_stub(self.make_channel('speech.googleapis.com', 443)) as self.service:
self.t1 = threading.Thread(target=self.listen_print_loop)
self.t1.start()
def request_stream(self, stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
recognition_config = cloud_speech.RecognitionConfig(
encoding='LINEAR16',
sample_rate=rate,
language_code='ko-KR',
)
streaming_config = cloud_speech.StreamingRecognitionConfig(
config=recognition_config,
interim_results=True,
single_utterance=False
)
yield cloud_speech.StreamingRecognizeRequest(streaming_config=streaming_config)
with self.record_audio(channels, rate, chunk) as audio_stream:
while not rospy.is_shutdown():
data = audio_stream.read(chunk)
if not data:
raise StopIteration()
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
@contextlib.contextmanager
def record_audio(self, channels, rate, chunk):
audio_interface = pyaudio.PyAudio()
audio_stream = audio_interface.open(format=pyaudio.paInt16, channels=channels, rate=rate, input=True,
frames_per_buffer=chunk, )
yield audio_stream
audio_stream.stop_stream()
audio_stream.close()
audio_interface.terminate()
def listen_print_loop(self):
recognize_stream = self.service.StreamingRecognize(self.request_stream(self.stop_audio), DEADLINE_SECS)
for resp in recognize_stream:
if resp.error.code != code_pb2.OK:
raise RuntimeError('Server error: ' + resp.error.message)
if self.is_stop_audio:
if resp.endpointer_type == 1:
self.is_start_audio = True
elif resp.endpointer_type == 2:
self.is_start_audio = False
self.is_stop_audio = True
if resp.endpointer_type == 0 and self.is_start_audio:
if not self.is_start_speech:
self.is_start_speech = True
self.pub_start_speech.publish()
elif resp.endpointer_type == 0 and self.is_stop_audio:
if self.is_start_speech:
self.is_start_speech = False
self.pub_end_speech.publish()
for result in resp.results:
if result.is_final:
msg = ResultTranscript()
msg.transcript = result.alternatives[0].transcript
msg.confidence = result.alternatives[0].confidence
self.pub_transcript.publish(msg)
def make_channel(self, host, port):
ssl_channel = implementations.ssl_channel_credentials(None, None, None)
creds = get_credentials().create_scoped([SPEECH_SCOPE])
auth_header = ('Authorization', 'Bearer ' + creds.get_access_token().access_token)
auth_plugin = implementations.metadata_call_credentials(
lambda _, cb: cb([auth_header], None),
name='google_creds')
composite_channel = implementations.composite_channel_credentials(ssl_channel, auth_plugin)
return implementations.secure_channel(host, port, composite_channel)
if __name__ == '__main__':
rospy.init_node('google_speech', anonymous=False)
speech = GoogleSpeech()
rospy.spin()