Beispiel #1
0
    def calc_recognition_performance(self, HResults_scp, lexicon_file=None):
        if lexicon_file == None:
            lexicon_file = self.lexicon_file

        self.command, self.output, self.error = run_command(
            ['HResults', '-T', '1', lexicon_file, '-S', HResults_scp])
        return self._load_recognition_performance()
Beispiel #2
0
    def recognition(self, lattice_file, hmmdefs, HVite_scp, lexicon_file=None):
        if lexicon_file == None:
            lexicon_file = self.lexicon_file

        self.command, self.output, self.error = run_command([
            'HVite', '-T', '1', '-C', self.config_rec, '-w', lattice_file,
            '-H', hmmdefs, lexicon_file, self.phonelist_txt, '-S', HVite_scp
        ])
        return self._load_recognition_result()
Beispiel #3
0
    def create_hmmdefs(self, proto, hmmdefs):
        """ allocate mean & variance to all phases in the phaselist """

        _, output, _ = run_command(
            ['perl', HTK.mkhmmdefs_pl, proto, self.phonelist_txt])

        if os.name == 'nt':
            output = output.replace('\r', '')
            output = output.replace('\n', '\r\n')

        with open(hmmdefs, 'wb') as f:
            f.write(bytes(output, 'ascii'))

        return
Beispiel #4
0
    def _network2lattice(self, network_file, lattice_file):
        """creats word level lattice files from a text file syntax description containing a set of rewrite rules based on extended Backus-Naur Form (EBNF).

		Args:
			network_file: word network.
			lattice_file: word level lattice file.

		Reference:
			http://www1.icsi.berkeley.edu/Speech/docs/HTKBook/node247.html

		"""
        self.command, self.output, self.error = run_command(
            ['HParse', network_file, lattice_file])
        return
Beispiel #5
0
    def flat_start(self, HCompV_scp, model_dir):
        """
		Args:
			config_train:
			HCompV_scp: a script file.
			model_dir: the directory.
		"""
        proto = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\config\proto'
        self._create_proto(proto)

        self.command, self.output, self.error = run_command([
            'HCompV', '-T', '1', '-C', self.config_train, '-f', '0.01', '-m',
            '-S', HCompV_scp, '-M', model_dir, proto
        ])
        return
Beispiel #6
0
    def re_estimation(self,
                      hmmdefs,
                      output_dir,
                      HCompV_scp,
                      mlf_file=None,
                      macros=None):
        command_list = []
        command_list.extend([
            'HERest', '-T', '1', '-C', self.config_train, '-v', '0.01', '-t',
            '250.0', '150.0', '1000.0', '-H', hmmdefs, '-M', output_dir
        ])
        if not mlf_file == None:
            command_list.extend(['-I', mlf_file])
        if not macros == None:
            command_list.extend(['-H', macros])
        command_list.extend(['-S', HCompV_scp, self.phonelist_txt])

        self.command, self.output, self.error = run_command(command_list)

        return
Beispiel #7
0
    def create_dictionary(self, sentence, log_txt, dictionary_file):
        """ when the length of the filename exceeds 32 characters, error.
		"""
        label_file = NamedTemporaryFile(mode='w',
                                        delete=False,
                                        encoding='utf-8')
        label_file.close()
        self.create_label_file(sentence, label_file.name)

        phonelist_txt = NamedTemporaryFile(mode='w',
                                           delete=False,
                                           encoding='utf-8')
        phonelist_txt.close()

        self.command, self.output, self.error = run_command([
            'HDMan', '-w', label_file.name, '-g', self.global_ded, '-n',
            phonelist_txt.name, '-l', log_txt, dictionary_file,
            self.lexicon_file
        ])

        os.remove(label_file.name)
        os.remove(phonelist_txt.name)

        return
Beispiel #8
0
def increase_mixture(hmmdefs, nmix, output_dir, phonelist_txt):
    fh.make_new_directory(output_dir)
    header_file = os.path.join(output_dir, 'mix' + str(nmix) + '.hed')
    with open(header_file, 'wb') as f:
        f.write(bytes('MU ' + str(nmix) + ' {*.state[2-4].mix}', 'ascii'))

    run_command([
        'HHEd', '-T', '1', '-H', hmmdefs, '-M', output_dir, header_file,
        phonelist_txt
    ])


#if __name__ == '__main__':

#def txt2label(file_txt, label_file):
#	"""
#	Convert an orthographycal transcription to the HTK label.
#	:param path file_txt: path to the text file in which the orthographycal transcription of the utterance is written in one line.
#	:param path label_file: path to the text file in which the contents of file_txt is written as a word per line.
#	"""
#	# read the first line where the sentence is written.
#	with open(file_txt, 'r') as f:
#		line1 = f.readline()

#	# remove space at the end and comma
#	line1 = line1.rstrip()
#	line1 = line1.replace(',', '')

#	# write each word in a capital letter
#	line1list = line1.split(' ')
#	with open(label_file, 'w') as f:
#		for word in line1list:
#			f.write("%s\n" % word.upper())

#def lab2HTKdic(label_file, fileHTKdic, lex, connect):
#	"""
#	Make a HTK dictionary file from a HTK label file.
#	Each word in the label file is first searched in database.
#	If not found, the Grapheme-to-Phone(G2P) program is used.
#	:param path label_file: path to the text file in which the contents of file_txt is written as a word per line.
#	:param path fileHTKdic: path to the output dictionary file in which pronunciation variants will be written.
#	:param instance lex: an instance of class cLexicon.
#	:param pypyodbc connect: an object of pypyodbc.
#	"""
#	cursor  = connect.cursor()
#	tableList  = ['2010_2510_lexicon_pronvars_HTK', 'lexicon_pronvars_g2p', 'lexicon_pronvars_ipa']

#	with open(label_file, 'r') as f:
#		lines = f.read()
#		words = lines.split()

#	with open(fileHTKdic, 'w') as f:
#		for WORD in words:
#			word = WORD.lower()
#			rows = np.array([])
#			for tableNum in range(0, len(tableList)):
#				SQL_string = """SELECT word, pronunciation FROM %s WHERE word =?""" %(tableList[tableNum])
#				cursor.execute(SQL_string, (word,))
#				rows_ = cursor.fetchall()
#				rows = np.append(rows, rows_)

#			# if the pronunciations are found in the database, retreive them.
#			# otherwise obtain the pronunciation using g2p.
#			if len(rows) > 2:
#				# reshape into d x 2 array
#				htkdic_ = rows.reshape((-1, 2))
#				# remove duplicates
#				htkdic  = np.unique(htkdic_, axis=0)
#			else:
#				# get pronunciation using g2p
#				fileWordList = tempfile.NamedTemporaryFile(mode='w', delete=False)
#				fileWordList.write("%s\n" % (word))
#				fileWordList.close()
#				htkdic = lex.g2p2db(fileWordList.name, connect)
#				fileWordList.close()
#				os.remove(fileWordList.name)

#			# output htkdic
#			for line in htkdic:
#				word = line[0].upper()
#				pron = line[1]
#				f.write("{0}\t{1}\n".format(word, pron))

#def loadHTKdic(fileHTKdic):
#	"""
#	load dic file which is used for HTK.
#	:param path fileHTKdic: path to the output dictionary file in which pronunciation variants will be written.
#	"""
#	HTKdic = []

#	with open(fileHTKdic, 'r') as f:
#		lines = f.read()
#		lines = lines.split('\n')

#		for line in lines:
#			line = line.split('\t')
#			if len(line) > 1:
#				HTKdic.append(line)

#	return np.array(HTKdic)

#def HTKdic2list(fileHTKdic):
#	"""
#	load an HTK dictionary file as a list.
#	:param path fileHTKdic: the path to an HTK dictionary file in which [word] /t [pronunciation] is written each line.
#	"""
#	with open(fileHTKdic, 'r') as fin:
#		lines_ = fin.read()
#		# split all text into lines
#		lines = lines_.split('\n')

#	htkdic = []
#	for line in lines:
#		lineSplit = line.split('\t')
#		if len(lineSplit) == 2:
#			word		  = lineSplit[0].lower()
#			pronunciation = lineSplit[1]
#			htkdic.append([word, pronunciation])
#	return np.array(htkdic)

#def doHVite(fileWav, label_file, fileHTKdic, fileFA, configHVite, filePhoneList, AcousticModel):
#	"""
#	Forced alignment using HVite of HTK.
#	:param path fileWav: path to the wav file in which the utterance was recorded.
#	:param path label_file: path to the text file in which the contents of file_txt is written as a word per line.
#	:param path fileHTKdic: path to the HTK dictionary file in which pronunciation variants are written.
#	:param path fileFA: path to the output file in which forced alignment (100ns unit) will be written.
#	:param path configHVite: path to the config file of HVite.
#	:param path filePhoneList: path to the list of phone used in the acoustic model and in the HTK dictionary file.
#	:param path AcousticModel: path to the acoustic model.
#	"""
#	with open(label_file, 'r') as f:
#		lines = f.read()

#	# Master Label File (= list of label files.)
#	fileMlf = tempfile.NamedTemporaryFile(mode='w', delete=False)
#	fileMlf.write("#!MLF!#\n")
#	fileMlf.write('"' + label_file + '"\n')
#	fileMlf.write(lines)
#	fileMlf.close()

#	# script
#	fileScp = tempfile.NamedTemporaryFile(mode='w', delete=False)
#	fileScp.write("%s" % (fileWav))
#	fileScp.close()

#	# HVite
#	subprocessStr = 'HVite -T 1 -a -C ' + configHVite + ' -H ' + AcousticModel + ' -m -I ' + fileMlf.name + ' -i ' + fileFA + ' -S ' + fileScp.name + ' ' + fileHTKdic + ' ' + filePhoneList
#	subprocess.call(subprocessStr, shell=True)

#	# termination process
#	os.remove(fileMlf.name)
#	os.remove(fileScp.name)

#def conv100ns2ms(fileFA_in_100ns, fileFA_in_ms):
#	"""
#	Convert the unit of forced alignment from 100ns to ms.
#	:param path fileFA_in_100ns: path to the forced alignment file written in the unit of 100ns.
#	:param path fileFA_in_ms: path to the output forced alignment file written in the unit of ms.
#	"""
#	with open(fileFA_in_100ns, 'r') as fin:
#		with open(fileFA_in_ms, 'w') as fout:
#			line = fin.readline()
#			fout.write(line)
#			line = fin.readline()
#			fout.write(line)

#			while line:
#				line = fin.readline()
#				dur = line.split()

#				# Each line of forced alignment file is:
#				# [durStart] [durEnd] [phoneme] [likelihood] [word]
#				if len(dur) == 4 or len(dur) == 5:
#					# convert 100ns -> ms
#					dur[0] = float(dur[0])/10000
#					dur[1] = float(dur[1])/10000
#					if len(dur) == 4:
#						fout.write('{0} {1} {2} {3}\n'.format(dur[0], dur[1], dur[2], dur[3]))
#					elif len(dur) == 5:
#						fout.write('{0} {1} {2} {3} {4}\n'.format(dur[0], dur[1], dur[2], dur[3], dur[4]))
#				else:
#					fout.write(line)

#def ForcedAlignment(fileWav, file_txt, fileOut, configFile, saveIntermediateFiles):
#	"""
#	Forced Alignment
#	:param path fileWav: path to the wav file in which the utterance was recorded.
#	:param path file_txt: path to the text file in which the orthographycal transcription of the utterance is written in one line.
#	:param path fileOut: path to the output file in which forced alignment will be written in the unit of ms.
#	:param path configFile: path to the general config file (not the config file for HVite).
#	:param int saveIntermediateFile: if intermediate files (label file and dictionary file) should be saved. 0:no, 1:yes
#	"""
#	# label file: the list of words that appears in the wave file.
#	#	should be in the same folder where wav file is stored.
#	label_file = fileWav.replace('.wav', '.lab')
#	# dic file: the pronunciation dictionary in which pronunciation(s) of each word in the label file are described.
#	fileHTKdic = fileWav.replace('.wav', '.dic')
#	# forced alignment output
#	fileFA = tempfile.NamedTemporaryFile(delete=False)
#	fileFA.close()

#	# load the config file
#	config = configparser.ConfigParser()
#	config.sections()
#	config.read(configFile)

#	dbLexicon	  = config['cLexicon']['dbLexicon']
#	scriptBarbara = config['cLexicon']['scriptBarbara']
#	exeG2P		  = config['cLexicon']['exeG2P']

#	configHVite	  = config['pyHTK']['configHVite']
#	filePhoneList = config['pyHTK']['filePhoneList']
#	AcousticModel = config['pyHTK']['AcousticModel']

#	# make database connection
#	param = r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};dbq=" + dbLexicon + ";"
#	connect = pypyodbc.connect(param)

#	# instance of class lexicon
#	if sys.platform == 'win32':
#		scriptBarbara = scriptBarbara.replace('\\\\', '\\')
#	lex = cLexicon.lexicon(scriptBarbara, exeG2P)

#	# load the orthographical transcription
#	# and output that word by word (e.g. one word per line) in capital letters...
#	txt2label(file_txt, label_file)

#	# for each words in the label file pronunciation(s) are searched in lexicon database...
#	lab2HTKdic(label_file, fileHTKdic, lex, connect)

#	# forced alignment using HVite
#	doHVite(fileWav, label_file, fileHTKdic, fileFA.name, configHVite, filePhoneList, AcousticModel)
#	conv100ns2ms(fileFA.name, fileOut)

#	# termination process
#	if saveIntermediateFiles == 0:
#		os.remove(label_file)
#		os.remove(fileHTKdic)
#	os.remove(fileFA.name)
#	connect.close()
Beispiel #9
0
 def _tie_sp_to_sil(self, macros, hmmdefs, output_dir):
     self.command, self.output, self.error = run_command([
         'HHEd', '-H', macros, '-H', hmmdefs, '-M', output_dir,
         self.sil_hed, self.phonelist_txt
     ])
     return
Beispiel #10
0
 def wav2mfc(self, hcopy_scp):
     self.command, self.output, self.error = run_command(
         ['HCopy', '-C', self.config_hcopy, '-S', hcopy_scp])
     return
Beispiel #11
0
 def mlf_word2phone(self, mlf_phone, mlf_word):
     self.command, self.output, self.error = run_command([
         'HLEd', '-l', '*', '-d', self.lexicon_file, '-i', mlf_phone,
         self.mkphones_led, mlf_word
     ])