def transcribe(self, raw_audio_file):
     segment_duration = 120
     segment_length = segment_duration * 14000 * 2
     raw = open(raw_audio_file, 'r')
     n = 1 + len(raw.read()) / segment_length
     raw.close()
     transcript = ''
     details = []
     for i in range(0, n):
         raw = open(raw_audio_file, 'r')
         raw.seek(i * segment_length)
         segment_data = raw.read(segment_length)
         raw.close()
         (transcript_s, details_s) = _sphinx3.decode_raw(segment_data)
         transcript += transcript_s + ' '
         details_with_offsets_s = []
         for (term, start, end, s1, s2) in details_s:
             details_with_offsets_s += [ (term, start / 100.0 + i * segment_duration, end / 100.0 + i * segment_duration, s1, s2) ]
         details += details_with_offsets_s
     return (transcript, details)
 def transcribe(self, raw_audio_file):
     segment_duration = 120
     segment_length = segment_duration * 14000 * 2
     raw = open(raw_audio_file, 'r')
     n = 1 + len(raw.read()) / segment_length
     raw.close()
     transcript = ''
     details = []
     for i in range(0, n):
         raw = open(raw_audio_file, 'r')
         raw.seek(i * segment_length)
         segment_data = raw.read(segment_length)
         raw.close()
         (transcript_s, details_s) = _sphinx3.decode_raw(segment_data)
         transcript += transcript_s + ' '
         details_with_offsets_s = []
         for (term, start, end, s1, s2) in details_s:
             details_with_offsets_s += [
                 (term, start / 100.0 + i * segment_duration,
                  end / 100.0 + i * segment_duration, s1, s2)
             ]
         details += details_with_offsets_s
     return (transcript, details)
Esempio n. 3
0
 def test_decode_raw(self):
     wav = open('../model/lm/an4/pittsburgh.littleendian.raw', 'rb')
     data = wav.read()
     text, segs = _sphinx3.decode_raw(data, 'foo')
     self.assertEqual(text, "P I T G S B U R G H")
Esempio n. 4
0
 def test_decode_raw(self):
     wav = open('../model/lm/an4/pittsburgh.littleendian.raw', 'rb')
     data = wav.read()
     text, segs = _sphinx3.decode_raw(data, 'foo')
     self.assertEqual(text, "P I T G S B U R G H")
Esempio n. 5
0
import _sphinx3, sys, os, osc

host = "localhost"	
portout = 9000

osc.init()
osc.sendMsg("/ready","1",host,portout)

_sphinx3.parse_argdict({'samprate': '16000', 'hmm': sys.path[0]+'/dic', 'dict': sys.path[0]+'/dic/pd.dic', 'fdict': sys.path[0]+'/dic/filler', 'lm': sys.path[0]+'/dic/pd.dmp'})
_sphinx3.init()
data = open("/tmp/r16k.raw").read()
words = _sphinx3.decode_raw(data)

osc.sendMsg("/words",str.lower(words[0]),host,portout)