def do_recognition(self, fname, actual_rate, start_time, end_time): request_id = uuid.uuid4() self.querystring['requestid'] = self.querystring['requestid'].format(request_id = request_id) self.headers['content-type'] = self.headers['content-type'].format(actual_rate = actual_rate) # Since token is updated automatically it should always be valid self.headers['authorization'] = self.headers['authorization'].format(token = self.oauth.token) response = requests.request("POST", self.url, # "http://httpbin.org/post", headers = self.headers, params = self.querystring, data = open(fname, 'rb') ) os.unlink(fname) # print(response.text) confidence = 0.0 if response.status_code == 200 and response.json()['header']['status'] == 'success': a = response.json() if 'lexical' in a['results'][0]: # text = a['results'][0]['lexical'] text = a['results'][0]['name'] rospy.loginfo("text: {}".format(text)) if 'confidence' in a['results'][0]: confidence = float(a['results'][0]['confidence']) confidence = confidence * 100 rospy.loginfo("confidence: {}".format(confidence)) msg = SpeechStamped() msg.header.stamp = start_time msg.header.frame_id = self.frame_id msg.duration = end_time - start_time msg.text = text msg.confidence = confidence self.pub_speech.publish(msg) # lock self.lock.acquire() try: # modify the list self.threads_list.remove(threading.currentThread()) finally: # release self.lock.release()
def do_recognition(self): """Do speech recognition""" while self.started: sox_p = subprocess.call(self.sox_args) end_time = rospy.Time.now() audio_len, _dummy_err = subprocess.Popen(self.length_args, stdout=subprocess.PIPE).communicate() start_time = end_time - rospy.Duration(float(audio_len.strip())) if float(audio_len) < self.dur_threshold: rospy.logwarn("Recorded audio is too short ({}s < {}s). Ignoring".format(float(audio_len), self.dur_threshold)) continue actual_rate, _dummy_err = subprocess.Popen(self.rate_args, stdout=subprocess.PIPE).communicate() self.actual_rate = int(actual_rate.strip()) self.wget_cmd = self.wget_cmd.format(actual_rate=self.actual_rate, api_key=self.api_key, lang=self.lang) self.wget_args = shlex.split(self.wget_cmd) wget_out, wget_err = subprocess.Popen( self.wget_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() # print wget_out # print wget_err confidence = 0.0 if not wget_err and len(wget_out) > 16: wget_out = wget_out.split('\n', 1)[1] a = json.loads(wget_out)['result'][0] if 'transcript' in a['alternative'][0]: text = a['alternative'][0]['transcript'] rospy.loginfo("text: {}".format(text)) if 'confidence' in a['alternative'][0]: confidence = a['alternative'][0]['confidence'] confidence = confidence * 100 rospy.loginfo("confidence: {}".format(confidence)) msg = SpeechStamped() msg.header.stamp = start_time msg.header.frame_id = "human_frame" msg.duration = end_time - start_time msg.text = text msg.confidence = confidence self.pub_speech.publish(msg)