Exemple #1
0
    def _map_variant(self, phonvariant):
        """
        Map phonemes of only one variant of a phonetized entry.

        @param phonvariant (str) One phonetization variant of an entry.

        """
        phones = self._map_split_variant(phonvariant)
        subs = []
        # Single phonemes
        for p in phones:
            mapped = self._maptable.map_entry(p)
            if len(mapped)>0:
                subs.append( p+"|"+mapped )
            else:
                subs.append( p )

        self._dagphon.variants = 0
        phon = ToStrip( self._dagphon.decompose(" ".join(subs)) )

        # Remove un-pronounced phonemes!!!
        # By convention, they are represented by an underscore in the
        # mapping table.
        tmp = []
        for p in phon.split('|'):
            r = [ x for x in p.split("-") if x != "_" ]
            tmp.append("-".join(r))

        return "|".join(set(tmp))
Exemple #2
0
    def run_alignment(self, inputwav, outputalign, N=3):
        """
        Execute the external program `julius` to align.
        The data related to the unit to time-align need to be previously
        fixed with:
            - set_phones(str)
            - set_tokens(str)

        @param inputwav (str - IN) the audio input file name, of type PCM-WAV 16000 Hz, 16 bits
        @param outputalign (str - OUT) the output file name
        @param N (int) N value of N-grams, used only if SLM (i.e. outext=walign)

        @return (str) A message of `julius`.

        """
        outputalign = outputalign + "." + self._outext

        basename = os.path.splitext(inputwav)[0]
        if self._outext == "palign":
            self.gen_grammar_dependencies(basename)
        else:
            self.gen_slm_dependencies(basename)

        self.run_julius(inputwav, basename, outputalign)
        with codecs.open(outputalign, 'r', encoding) as f:
            lines = f.readlines()

        errorlines = ""
        message = ""

        entries = []
        for line in lines:
            if line.find("Error: voca_load_htkdict")>-1 and line.find("not found")>-1:
                line = ToStrip( line )
                line = line[line.find('"')+1:]
                line = line[:line.find('"')]
                if len(line)>0:
                    entries = line.split()

        if len(entries) > 0:
            added = self.add_tiedlist(entries)
            if len(added) > 0:
                message = "The acoustic model was modified. The following entries were successfully added into the tiedlist: "
                message = message + " ".join(added) + "\n"
                self.run_julius(inputwav, basename, outputalign)
                with codecs.open(outputalign, 'r', encoding) as f:
                    lines = f.readlines()

        for line in lines:
            if (line.startswith("Error:") or line.startswith("ERROR:")) and not " line " in line:
                errorlines = errorlines + line
            if "search failed" in line:
                message = "Julius search has failed to find the transcription in the audio file of this unit."
                errorlines = "Search error. "+ errorlines

        if len(errorlines) > 0:
            raise Exception(message + errorlines)

        return message
Exemple #3
0
    def read(self, dirname):
        """
        Read a list file (start-time end-time).

        @param filename is the list file name.
        @raise IOError

        """
        filename = os.path.join( dirname, ListIO.DEFAULT_FILENAME )
        with codecs.open(filename, 'r', encoding) as fp:
            lines = fp.readlines()

        _units = []
        # Each line corresponds to a track,
        # with a couple 'start end' of float values.
        for line in lines:
            line = ToStrip(line)
            _tab = line.split()
            if len(_tab) >= 2:
                _units.append( (float(_tab[0]),float(_tab[1])) )

        return _units
Exemple #4
0
    def get_phon_entry(self, entry):
        """
        Return the phonetization of an entry.
        Unknown entries are not automatically phonetized.
        This is a pure dictionary-based method.

        @param `entry` (str) The token to phonetize.
        @return A string with the phonetization of `entry` or
        the unknown symbol.

        """
        entry = ToStrip(entry)

        # Specific strings... for the italian transcription...
        # For the participation at the CLIPS-Evalita 2011 campaign.
        if entry.startswith(u"<") is True and entry.endswith(u">") is True:
            entry = entry[1:-1]

        # No entry! Nothing to do.
        if len(entry) == 0:
            return ""

        # Specific strings used in the CID transcription...
        # CID is Corpus of Interactional Data, http://sldr.org/sldr000720
        if entry.startswith(u"gpd_") is True or entry.startswith(u"gpf_") is True:
            return ""

        # Specific strings used in SPPAS IPU segmentation...
        if entry.find(u"ipu_")>-1:
            return ""

        # Find entry in the dict as it is given
        _strphon = self._pdict.get_pron( entry )

        # OK, the entry is properly phonetized.
        if _strphon != self._pdict.unkstamp:
            return self._map_phonentry( _strphon )

        return self._pdict.unkstamp
Exemple #5
0
    def read_walign(self, filename):
        """
        Read an alignment file in the standard format of Julius CSR engine.

        @param filename (str - IN) The input file name.
        @return Two lists of tuples:
            - None
            - (start-time end-time word score)

        """
        tokens = [""]
        scores = [0]
        _wordalign = []
        wordidx = -1
        with codecs.open(filename, "r", encoding) as fp:
            lines = fp.readlines()

        for line in lines:
            # Each line is either a new annotation or nothing interesting!
            line = ToStrip(line)

            if line.startswith("=== begin forced alignment ==="):
                wordidx = 0

            elif line.startswith("=== end forced alignment ==="):
                wordidx = -1

            elif line.startswith("wseq1:"):
                line = line[6:]
                # each token
                tokens = line.split()
                if len(tokens) == 0:
                    tokens = [""]

            elif line.startswith("cmscore1:"):
                line = line[9:]
                # confidence score of the pronunciation of each token
                scores = [float(s) for s in line.split()]
                if len(scores) == 0:
                    scores = [0]

            elif line.startswith("[") and wordidx > -1:
                # New phonemes
                line = line.replace("[", "")
                line = line.replace("]", "")
                line = ToStrip(line)
                tab = line.split(" ")
                # tab 0: first frame
                # tab 1: last frame
                # tab 2: score of the segmentation (log proba)
                # tab 3: word
                loc_s = float(tab[0]) / 100.0
                loc_e = float(tab[1]) / 100.0
                _wordalign.append([loc_s, loc_e, tokens[wordidx], scores[wordidx]])
                wordidx = wordidx + 1

        # Adjust time values
        for wordidx in range(len(_wordalign)):

            # Fix the end of this annotation to the begin of the next one.
            loc_e = _wordalign[wordidx][1]
            if wordidx < (len(_wordalign) - 1):
                nextloc_s = _wordalign[wordidx + 1][0]
            else:
                nextloc_s = 0.0
            if loc_e < nextloc_s:
                loc_e = nextloc_s
            _wordalign[wordidx][1] = loc_e

        return (None, _wordalign)
Exemple #6
0
    def read_palign(self, filename):
        """
        Read an alignment file in the standard format of Julius CSR engine.

        @param filename (str - IN) The input file name.
        @return Two lists of tuples:
            - (start-time end-time phoneme score)
            - (start-time end-time word score)

        """
        _phonalign = []
        _wordalign = []

        phonidx = -1  # phoneme index
        loc_s = 0.0  # phoneme start time
        loc_e = 0.0  # phoneme end time
        phonlist = []
        wordseq = []
        scores = [0]
        tokens = [""]

        with codecs.open(filename, "r", encoding) as fp:
            lines = fp.readlines()

        for line in lines:
            # Each line is either a new annotation or nothing interesting!
            line = ToStrip(line)

            if line.startswith("=== begin forced alignment ==="):
                phonidx = 0

            elif line.startswith("=== end forced alignment ==="):
                phonidx = -1

            elif line.startswith("phseq1:"):
                line = line[7:]
                line = ToStrip(line)

                wordseq = line.split("|")
                # get indexes of each word
                wordlist = []
                _idx = -1
                for w in wordseq:
                    _wrdphseq = w.strip().split()
                    _idx += len(_wrdphseq)
                    wordlist.append(_idx)
                # get the list of phonemes (without word segmentation)
                line = line.replace("|", "")
                line = ToStrip(line)
                phonlist = line.split()

            elif line.startswith("cmscore1:"):
                line = line[9:]
                # confidence score of the pronunciation of each token
                scores = [float(s) for s in line.split()]
                if len(scores) == 0:
                    scores = [0]

            elif line.startswith("sentence1:"):
                line = line[10:]
                # each token
                tokens = line.split()
                if len(tokens) == 0:
                    tokens = [""]

            elif line.startswith("[") and phonidx > -1:
                # New phonemes
                line = line.replace("[", "")
                line = line.replace("]", "")
                line = ToStrip(line)
                tab = line.split(" ")
                # tab 0: first frame
                # tab 1: last frame
                # tab 2: score of the segmentation (log proba)
                # tab 3: triphone used
                loc_s = float(tab[0]) / 100.0
                loc_e = float(tab[1]) / 100.0
                if len(tab) > 3:
                    # Put real phoneme instead of triphones
                    _phonalign.append([loc_s, loc_e, phonlist[phonidx], tab[2]])
                else:
                    _phonalign.append([loc_s, loc_e, "", tab[2]])
                phonidx = phonidx + 1

        # Adjust time values and create wordalign
        wordidx = 0  # word index
        wordloc_s = 0.0  # word start time
        loc_e = 0.0
        nextloc_s = 0.0
        for phonidx in range(len(_phonalign)):

            # Fix the end of this annotation to the begin of the next one.
            loc_e = _phonalign[phonidx][1]
            if phonidx < (len(_phonalign) - 1):
                nextloc_s = _phonalign[phonidx + 1][0]
            else:
                nextloc_s = 0.0
            if loc_e < nextloc_s:
                loc_e = nextloc_s
            _phonalign[phonidx][1] = loc_e

            # Override the segmentation score of the phone by
            # the score of the pronunciation of the word
            _phonalign[phonidx][3] = scores[wordidx]

            # add also the word?
            if phonidx == wordlist[wordidx]:
                _wordalign.append([wordloc_s, loc_e, tokens[wordidx], scores[wordidx]])
                wordidx = wordidx + 1
                wordloc_s = loc_e

        # last word, or the only entry in case of empty interval...
        if len(wordseq) - 1 == wordidx:
            _wordalign.append([wordloc_s, loc_e, tokens[wordidx - 1], scores[wordidx - 1]])

        return (_phonalign, _wordalign)