forked from bbc/bbc-reith-lectures-sphinx-evaluation
/
transcriber.py
69 lines (63 loc) · 2.49 KB
/
transcriber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Copyright (c) 2011 British Broadcasting Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import _sphinx3
import os
class Transcriber(object):
_instance = None
initialised = False
sphinx3_dir = '/usr/local/share/sphinx3'
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(Transcriber, cls).__new__(
cls, *args, **kwargs)
return cls._instance
def initialise(self, acoustic_model, dictionary, fdict, language_model):
if not self.initialised:
_sphinx3.parse_argdict({
'samprate': '14000',
'nfft': '1024',
'hmm': acoustic_model,
'dict': dictionary,
'fdict': fdict,
'lm': language_model,
'beam': '1e-60',
'wbeam': '1e-40',
'ci_pbeam': '1e-8',
'subvqbeam': '1e-2',
'maxhmmpf': '2000',
'maxcdsenpf': '1000',
'maxwpf': '8',
})
_sphinx3.init()
self.initialised = True
def transcribe(self, raw_audio_file):
segment_duration = 120
segment_length = segment_duration * 14000 * 2
raw = open(raw_audio_file, 'r')
n = 1 + len(raw.read()) / segment_length
raw.close()
transcript = ''
details = []
for i in range(0, n):
raw = open(raw_audio_file, 'r')
raw.seek(i * segment_length)
segment_data = raw.read(segment_length)
raw.close()
(transcript_s, details_s) = _sphinx3.decode_raw(segment_data)
transcript += transcript_s + ' '
details_with_offsets_s = []
for (term, start, end, s1, s2) in details_s:
details_with_offsets_s += [ (term, start / 100.0 + i * segment_duration, end / 100.0 + i * segment_duration, s1, s2) ]
details += details_with_offsets_s
return (transcript, details)