コード例 #1
0
ファイル: msspeech_node.py プロジェクト: bgromov/gspeech
  def do_recognition(self, fname, actual_rate, start_time, end_time):
    request_id = uuid.uuid4()
    self.querystring['requestid'] = self.querystring['requestid'].format(request_id = request_id)
    self.headers['content-type'] = self.headers['content-type'].format(actual_rate = actual_rate)

    # Since token is updated automatically it should always be valid
    self.headers['authorization'] = self.headers['authorization'].format(token = self.oauth.token)

    response = requests.request("POST",
      self.url, # "http://httpbin.org/post",
      headers = self.headers,
      params  = self.querystring,
      data    = open(fname, 'rb')
    )

    os.unlink(fname)

    # print(response.text)

    confidence = 0.0

    if response.status_code == 200 and  response.json()['header']['status'] == 'success':
      a = response.json()
      if 'lexical' in a['results'][0]:
        # text = a['results'][0]['lexical']
        text = a['results'][0]['name']
        rospy.loginfo("text: {}".format(text))
      if 'confidence' in a['results'][0]:
        confidence = float(a['results'][0]['confidence'])
        confidence = confidence * 100
        rospy.loginfo("confidence: {}".format(confidence))

      msg = SpeechStamped()
      msg.header.stamp = start_time
      msg.header.frame_id = self.frame_id
      msg.duration = end_time - start_time
      msg.text = text
      msg.confidence = confidence
      self.pub_speech.publish(msg)

    # lock
    self.lock.acquire()
    try:
      # modify the list
      self.threads_list.remove(threading.currentThread())
    finally:
      # release
      self.lock.release()
コード例 #2
0
ファイル: gspeech_node.py プロジェクト: bgromov/gspeech
  def do_recognition(self):
    """Do speech recognition"""
    while self.started:
      sox_p = subprocess.call(self.sox_args)
      end_time = rospy.Time.now()
      audio_len, _dummy_err = subprocess.Popen(self.length_args, stdout=subprocess.PIPE).communicate()
      start_time = end_time - rospy.Duration(float(audio_len.strip()))

      if float(audio_len) < self.dur_threshold:
        rospy.logwarn("Recorded audio is too short ({}s < {}s). Ignoring".format(float(audio_len), self.dur_threshold))
        continue


      actual_rate, _dummy_err = subprocess.Popen(self.rate_args, stdout=subprocess.PIPE).communicate()
      self.actual_rate = int(actual_rate.strip())

      self.wget_cmd = self.wget_cmd.format(actual_rate=self.actual_rate, api_key=self.api_key, lang=self.lang)
      self.wget_args = shlex.split(self.wget_cmd)

      wget_out, wget_err = subprocess.Popen(
        self.wget_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE
      ).communicate()

      # print wget_out
      # print wget_err

      confidence = 0.0

      if not wget_err and len(wget_out) > 16:
        wget_out = wget_out.split('\n', 1)[1]
        a = json.loads(wget_out)['result'][0]
        if 'transcript' in a['alternative'][0]:
          text = a['alternative'][0]['transcript']
          rospy.loginfo("text: {}".format(text))
        if 'confidence' in a['alternative'][0]:
          confidence = a['alternative'][0]['confidence']
          confidence = confidence * 100
          rospy.loginfo("confidence: {}".format(confidence))

        msg = SpeechStamped()
        msg.header.stamp = start_time
        msg.header.frame_id = "human_frame"
        msg.duration = end_time - start_time
        msg.text = text
        msg.confidence = confidence
        self.pub_speech.publish(msg)