def run(self, inputfilename, outputphonfile=None, outputtokensfile=None): """ Run the Phonetization process from a tokenized input.TO DO. Parameters: - inputfilename is the input file name - outputphonfile is the output file name of the phonetization - outputtokensfile is the output file name of the tokenization Return: None Exceptions: IOError """ if not outputphonfile: outputphonfile = inputfilename.replace(".TextGrid", "-phon.TextGrid") if not outputtokensfile: outputtokensfile = inputfilename.replace(".TextGrid", "-tokens.TextGrid") # Program names and data dirs esppasdir = os.path.join(SPPAS, "scripts") esppasdir = os.path.join(esppasdir, "esppas") formate = os.path.join(esppasdir, "formatphonFRTOE.awk") rustine = os.path.join(esppasdir, "rustinePhonFRTOE.sh") # ALGORITHM: # - EXTRACT EACH INTERVAL # - CALL LIA_PHON ON THIS INTERVAL # - GET RESULT AND PUT IN AN OUTPUT TIER # Write the appropriate tier in a LIA-style text file # it also will convert input from UTF8 to iso8859-1 try: inputok = genName().get_name() + ".liatxt" trs = Transcription() trs.Append(inputfilename.get_tier(1)) annotationdata.io.write(inputok, trs) except Exception: raise BaseException("Phon::esppasphon.py. Processing of phonetization failed.\n") # Execute LIA_Phon command = "cat " command += inputok command += " | $LIA_PHON_REP/script/lia_text2phon > " command += tmpoutput # ESPPAS patches command = "cat " command += tmpoutput command += " | gawk -f " command += formate command += " -l -t -m 250 -s '##' | " command += rustine command += " > " command += outputphonfile ret = self.run_command(command) # self.run_command( "rm "+tmpoutput ) if not os.path.isfile(outputphonfile): raise BaseException("Phon::esppasphon.py. Post-processing of TOE-Phonetization failed.\n")
def old_run(self, inputfilename, outputphonfile=None, outputtokensfile=None): """ Run the Phonetization process from a transcription. For details, see: B. Bigi, P. Péri, R. Bertrand (2012) Orthographic Transcription: which Enrichment is required for phonetization? Language Resources and Evaluation Conference, Istanbul (Turkey), pages 1756-1763, ISBN 978-2-9517408-7-7. INPUT: an enriched orthographic transcription BRIEF: Get the TO, create a tree, call LIA_Tagg, update the tree, call LIA_Phon, update the tree, then get the phonetization. Parameters: - inputfilename is the input file name - outputphonfile is the output file name of the phonetization - outputtokensfile is the output file name of the tokenization Return: None Exceptions: IOError """ if not outputphonfile: outputphonfile = inputfilename.replace(".TextGrid", "-phon.TextGrid") if not outputtokensfile: outputtokensfile = inputfilename.replace(".TextGrid", "-tokens.TextGrid") # Program names and data dirs esppasdir = os.path.join(SPPAS, "scripts") esppasdir = os.path.join(esppasdir, "esppas") jar = os.path.join(esppasdir, "phonetizationFRTOE.jar") etc = os.path.join(esppasdir, "etc") etc = os.path.join(etc, " ") formate = os.path.join(esppasdir, "formatphonFRTOE.awk") rustine = os.path.join(esppasdir, "rustinePhonFRTOE.sh") # First command to execute command = "java -Dfile.encoding=iso8859-1 -jar " command += jar # with its parameters: command += " -s " + etc command += " -i " + inputfilename command += " -t 1 " tmpoutput = genName().get_name() + ".TextGrid" command += " -o " + tmpoutput command += " -w " + outputtokensfile command += " -v 0 " ret = self.run_command(command) if not os.path.isfile(tmpoutput): raise BaseException("Phon::esppasphon.py. TOE-Phonetization failed.\n") # ESPPAS patches command = "cat " command += tmpoutput command += " | gawk -f " command += formate command += " -l -t -m 250 -s '##' | " command += rustine command += " > " command += outputphonfile ret = self.run_command(command) # self.run_command( "rm "+tmpoutput ) if not os.path.isfile(outputphonfile): raise BaseException("Phon::esppasphon.py. Post-processing of TOE-Phonetization failed.\n")