Beispiel #1
0
    def run(self, inputfilename, outputphonfile=None, outputtokensfile=None):
        """ Run the Phonetization process from a tokenized input.TO DO.

            Parameters:
                - inputfilename is the input file name
                - outputphonfile is the output file name of the phonetization
                - outputtokensfile is the output file name of the tokenization
            Return:      None
            Exceptions:  IOError
        """

        if not outputphonfile:
            outputphonfile = inputfilename.replace(".TextGrid", "-phon.TextGrid")
        if not outputtokensfile:
            outputtokensfile = inputfilename.replace(".TextGrid", "-tokens.TextGrid")

        # Program names and data dirs
        esppasdir = os.path.join(SPPAS, "scripts")
        esppasdir = os.path.join(esppasdir, "esppas")
        formate = os.path.join(esppasdir, "formatphonFRTOE.awk")
        rustine = os.path.join(esppasdir, "rustinePhonFRTOE.sh")

        # ALGORITHM:
        # - EXTRACT EACH INTERVAL
        # - CALL LIA_PHON ON THIS INTERVAL
        # - GET RESULT AND PUT IN AN OUTPUT TIER

        # Write the appropriate tier in a LIA-style text file
        # it also will convert input from UTF8 to iso8859-1
        try:
            inputok = genName().get_name() + ".liatxt"
            trs = Transcription()
            trs.Append(inputfilename.get_tier(1))
            annotationdata.io.write(inputok, trs)
        except Exception:
            raise BaseException("Phon::esppasphon.py. Processing of phonetization failed.\n")

        # Execute LIA_Phon
        command = "cat "
        command += inputok
        command += " | $LIA_PHON_REP/script/lia_text2phon > "
        command += tmpoutput

        # ESPPAS patches
        command = "cat "
        command += tmpoutput
        command += " | gawk -f "
        command += formate
        command += " -l -t -m 250 -s '##' | "
        command += rustine
        command += " > "
        command += outputphonfile

        ret = self.run_command(command)
        # self.run_command( "rm "+tmpoutput )
        if not os.path.isfile(outputphonfile):
            raise BaseException("Phon::esppasphon.py. Post-processing of TOE-Phonetization failed.\n")
Beispiel #2
0
    def old_run(self, inputfilename, outputphonfile=None, outputtokensfile=None):
        """ Run the Phonetization process from a transcription.

            For details, see:
            B. Bigi, P. Péri, R. Bertrand (2012)
            Orthographic Transcription: which Enrichment is required for phonetization?
            Language Resources and Evaluation Conference, Istanbul (Turkey),
            pages 1756-1763, ISBN 978-2-9517408-7-7.

            INPUT: an enriched orthographic transcription
            BRIEF: Get the TO, create a tree, call LIA_Tagg, update the tree,
            call LIA_Phon, update the tree, then get the phonetization.

            Parameters:
                - inputfilename is the input file name
                - outputphonfile is the output file name of the phonetization
                - outputtokensfile is the output file name of the tokenization
            Return:      None
            Exceptions:  IOError
        """

        if not outputphonfile:
            outputphonfile = inputfilename.replace(".TextGrid", "-phon.TextGrid")
        if not outputtokensfile:
            outputtokensfile = inputfilename.replace(".TextGrid", "-tokens.TextGrid")

        # Program names and data dirs
        esppasdir = os.path.join(SPPAS, "scripts")
        esppasdir = os.path.join(esppasdir, "esppas")
        jar = os.path.join(esppasdir, "phonetizationFRTOE.jar")
        etc = os.path.join(esppasdir, "etc")
        etc = os.path.join(etc, " ")
        formate = os.path.join(esppasdir, "formatphonFRTOE.awk")
        rustine = os.path.join(esppasdir, "rustinePhonFRTOE.sh")

        # First command to execute
        command = "java -Dfile.encoding=iso8859-1 -jar "
        command += jar
        # with its parameters:
        command += " -s " + etc
        command += " -i " + inputfilename
        command += " -t 1 "
        tmpoutput = genName().get_name() + ".TextGrid"
        command += " -o " + tmpoutput
        command += " -w " + outputtokensfile
        command += " -v 0 "

        ret = self.run_command(command)
        if not os.path.isfile(tmpoutput):
            raise BaseException("Phon::esppasphon.py. TOE-Phonetization failed.\n")

        # ESPPAS patches
        command = "cat "
        command += tmpoutput
        command += " | gawk -f "
        command += formate
        command += " -l -t -m 250 -s '##' | "
        command += rustine
        command += " > "
        command += outputphonfile

        ret = self.run_command(command)
        # self.run_command( "rm "+tmpoutput )
        if not os.path.isfile(outputphonfile):
            raise BaseException("Phon::esppasphon.py. Post-processing of TOE-Phonetization failed.\n")