Esempio n. 1
    def fix_segmenter(self, model, modelL1):
        Fix the acoustic model directory, then create a SpeechSegmenter and AlignerIO.

        @param model is the acoustic model directory name of the language of the text,
        @param modelL1 is the acoustic model directory name of the mother language of the speaker,

        if modelL1 is not None:
                modelmixer = ModelMixer()
                modelmixer.load( model,modelL1 )
                outputdir = os.path.join(RESOURCES_PATH, "models", "models-mix")
                modelmixer.mix( outputdir, gamma=1. )
                model = outputdir
            except Exception as e:
                self.print_message("The model is ignored: %s"%str(e), indent=3, status=WARNING_ID)

        # Map phoneme names from model-specific to SAMPA and vice-versa
        mappingfilename = os.path.join( model, "monophones.repl")
        if os.path.isfile( mappingfilename ):
                mapping = Mapping( mappingfilename )
            except Exception:
                mapping = Mapping()
            mapping = Mapping()

        # Manager of the interval tracks
        self.alignio = AlignIO( mapping, model )
Esempio n. 2
class sppasAlign( sppasBase ):
    @author:       Brigitte Bigi
    @organization: Laboratoire Parole et Langage, Aix-en-Provence, France
    @contact:      [email protected]
    @license:      GPL, v3
    @copyright:    Copyright (C) 2011-2016  Brigitte Bigi
    @summary:      SPPAS integration of the Alignment automatic annotation.

    This class can produce 1 up to 4 tiers with names:

        - PhonAlign,
        - TokensAlign (if tokens are given in the input).
        - PhnTokAlign - option (if tokens are given in the input),
        - Activity    - option (if tokens are given in the input),

    How to use sppasAlign?

    >>> a = sppasAlign( modeldirname )
    >>>, inputtokensname, inputaudioname, outputfilename)

    def __init__(self, model, modelL1=None, logfile=None):
        Create a new sppasAlign instance.

        @param model (str) the acoustic model directory name of the language of the text
        @param modelL1 (str) the acoustic model directory name of the mother language of the speaker
        @param logfile (sppasLog)

        sppasBase.__init__(self, logfile)

        # Members: self.alignio
        self.fix_segmenter( model,modelL1 )

    # ------------------------------------------------------------------

    def reset(self):
        Set default values.

        # List of options to configure this automatic annotation
        self._options = {}
        self._options['clean']    = True  # Remove temporary files
        self._options['infersp']  = False # Add 'sp' at the end of each token
        self._options['basic']    = False # Perform a basic alignment if error
        self._options['activity'] = True
        self._options['phntok']   = False

    # -----------------------------------------------------------------------

    def fix_segmenter(self, model, modelL1):
        Fix the acoustic model directory, then create a SpeechSegmenter and AlignerIO.

        @param model is the acoustic model directory name of the language of the text,
        @param modelL1 is the acoustic model directory name of the mother language of the speaker,

        if modelL1 is not None:
                modelmixer = ModelMixer()
                modelmixer.load( model,modelL1 )
                outputdir = os.path.join(RESOURCES_PATH, "models", "models-mix")
                modelmixer.mix( outputdir, gamma=1. )
                model = outputdir
            except Exception as e:
                self.print_message("The model is ignored: %s"%str(e), indent=3, status=WARNING_ID)

        # Map phoneme names from model-specific to SAMPA and vice-versa
        mappingfilename = os.path.join( model, "monophones.repl")
        if os.path.isfile( mappingfilename ):
                mapping = Mapping( mappingfilename )
            except Exception:
                mapping = Mapping()
            mapping = Mapping()

        # Manager of the interval tracks
        self.alignio = AlignIO( mapping, model )

    # ------------------------------------------------------------------------
    # Methods to fix options
    # ------------------------------------------------------------------------

    def fix_options(self, options):
        Fix all options.

        @param options (option)

        for opt in options:

            key = opt.get_key()

            if "clean" == key:
                self.set_clean( opt.get_value() )

            elif "basic" == key:
                self.set_basic( opt.get_value() )

            elif "aligner" == key:
                self.set_aligner( opt.get_value() )

            elif "infersp" == key:
                self.set_infersp( opt.get_value() )

            elif "activity" == key:
                self.set_activity_tier( opt.get_value() )

            elif "activityduration" == key:
                self.set_activityduration_tier( opt.get_value() )

            elif "phntok" == key:
                self.set_phntokalign_tier( opt.get_value() )

                raise KeyError('Unknown key option: %s'%key)

    # ----------------------------------------------------------------------

    def set_clean(self, clean):
        Fix the clean option.

        @param clean (bool - IN) If clean is set to True then temporary files
        will be removed.

        self._options['clean'] = clean

    # -----------------------------------------------------------------------

    def set_aligner(self, alignername):
        Fix the name of the aligner.
        The list of accepted aligner names is available in:
        >>> aligners.aligner_names()

        @param alignername (str - IN) Case-insensitive name of the aligner.

        self._options['aligner'] = alignername

    # -----------------------------------------------------------------------

    def set_infersp(self, infersp):
        Fix the infersp option.

        @param infersp (bool - IN) If infersp is set to True, the aligner
        will add an optional short pause at the end of each token, and the
        will infer if it is relevant.

        self.alignio.set_infersp( infersp )

    # -----------------------------------------------------------------------

    def set_basic(self, basic):
        Fix the basic option.

        @param basic (bool - IN) If basic is set to True, a basic segmentation
        will be performer if the main aligner fails.

        self._options['basic'] = basic

    # -----------------------------------------------------------------------

    def set_activity_tier(self, value):
        Fix the activity option.

        @param value (bool - IN) Activity tier generation.

        self._options['activity'] = bool(value)

    # -----------------------------------------------------------------------

    def set_activityduration_tier(self, value):
        Fix the activity duration option.

        @param value (bool - IN) Activity tier generation.

        self._options['activityduration'] = bool(value)

    # -----------------------------------------------------------------------

    def set_phntokalign_tier(self, value):
        Fix the phntok option.

        @param value (bool - IN) PhnTokAlign tier generation.

        self._options['phntok'] = bool(value)

    # -----------------------------------------------------------------------
    # Methods to time-align series of data
    # -----------------------------------------------------------------------

    def convert_tracks(self, diralign, trstier):
        Call the Aligner to align each unit of a directory.

        @param diralign is the directory to get units and put alignments.
        @param trstier (Tier) required only if basic alignment.

        # Verify if the directory exists
        if not os.path.exists( diralign ):
            raise IOError('The directory '+diralign+' does not exist.')

        # Get all audio tracks
        dirlist = glob.glob(os.path.join(diralign, "track_*.wav"))
        ntracks = len(dirlist)
        if ntracks == 0:
            raise IOError('The directory '+diralign+' does not contain data.')

        track = 1
        while track <= ntracks:
            self.print_message('Align interval number '+str(track), indent=3)

                msg = self.alignio.segment_track(track,diralign)
                if len(msg)>0:
                    self.print_message(msg, indent=3, status=INFO_ID)

            except Exception as e:
                self.print_message(self.alignio.get_aligner()+' failed to perform segmentation.', indent=3, status=ERROR_ID)
                self.print_message(str(e), indent=4, status=INFO_ID)
                #import traceback

                # Execute BasicAlign
                if self._options['basic'] is True:
                    if self.logfile:
                        self.logfile.print_message('Execute a Basic Alignment - same duration for each phoneme:', indent=3)
                    alignerid = self.alignio.get_aligner()
                    msg = self.alignio.segment_track(track,diralign)
                # or Create an empty alignment, to get an empty interval in the final tier
                    msg = self.alignio.segment_track(track,diralign,segment=False)

            track = track + 1

    # ------------------------------------------------------------------------

    def convert( self, phontier, toktier, inputaudio, workdir ):
        Perform speech segmentation of data in tiers tokenization/phonetization.

        @param phontier (Tier - IN) The phonetization.
        @param toktier (Tier - IN) The tokenization, or None.
        @param audioname (str - IN) Audio file name.

        @return A transcription.

        if os.path.exists( workdir ) is False:
            os.mkdir( workdir )

        # Split input into tracks
        # --------------------------------------------------------------

        self.print_message("Split into intervals: ", indent=2)
        sgmt = self.alignio.split( inputaudio, phontier, toktier, workdir )

        # Align each track
        # --------------------------------------------------------------

        self.print_message("Align each interval: ", indent=2)
        self.convert_tracks( workdir, phontier )

        # Merge track alignment results
        # --------------------------------------------------------------

        self.print_message("Merge interval's alignment:", indent=2)

        trsoutput = Transcription("AutomaticAlignment")
        for tier in sgmt:

        # Create a Transcription() object with alignments
        trs = workdir )
        if self.alignio.get_aligner() != 'basic':
            trs = self.rustine_liaisons(trs)
            trs = self.rustine_others(trs)
        for tier in trs:

        return trsoutput

    # ------------------------------------------------------------------------

    def append_extra(self, trs):
        Append extra tiers in trs: Activity and PhnTokAlign.

        tokenalign = trs.Find("TokensAlign")
        if tokenalign is None:
            self.print_message("No time-aligned tokens found. No extra tier can be generated.", indent=2, status=WARNING_ID)
            return trs

        # PhnTokAlign tier
        if self._options['phntok'] is True:
                phonalign = trs.Find("PhonAlign")
                tier = self.phntokalign_tier(phonalign,tokenalign)
                trs.GetHierarchy().addLink("TimeAssociation", tokenalign, tier)
            except Exception as e:
                self.print_message("PhnTokAlign generation: %s"%str(e), indent=2, status=WARNING_ID)

        # Activity tier
        if self._options['activity'] is True or self._options['activityduration']:
                activity = Activity( trs )
                tier = activity.get_tier()
                if self._options['activity'] is True:
                    trs.GetHierarchy().addLink("TimeAlignment", tokenalign, tier)

                if self._options['activityduration'] is True:
                    dtier = tier.Copy()
                    dtier.SetName( "ActivityDuration" )
                    for a in dtier:
                        d = a.GetLocation().GetDuration().GetValue()
                        a.GetLabel().SetValue( '%.3f' % d )

            except Exception as e:
                self.print_message("Activities generation: %s"%str(e), indent=2, status=WARNING_ID)

        return trs

    # ------------------------------------------------------------------------

    def get_phonestier(self, trsinput):
        Return the tier with phonetization or None.

        # Search for a tier starting with "phon"
        for tier in trsinput:
            if tier.GetName().lower().startswith("phon") is True:
                return tier

        # Search for a tier containing "phon"
        for tier in trsinput:
            if "phon" in tier.GetName().lower():
                return tier

        return None

    # ------------------------------------------------------------------------

    def get_tokenstier(self, trsinput):
        Return the tier with tokens, or None.

        In case of EOT, 2 tiers with tokens are available: std and faked.
        Priority is given to std.

        toktier   = None # None tier with tokens
        stdtier   = None # index of stdtoken tier
        fakedtier = None # index of fakedtoken tier

        for tier in trsinput:
            tiername = tier.GetName().lower()
            if "std" in tiername and "token" in tiername:
                return stdtier
            elif "faked" in tiername and "token" in tiername:
                fakedtier = tier
            elif "token" in tiername:
                toktier = tier

        if fakedtier is not None:
            return fakedtier

        return toktier

    # ------------------------------------------------------------------------

    def phntokalign_tier(self, tierphon, tiertoken):
        Generates the PhnTokAlignTier from PhonAlign and TokensAlign.

        newtier = Tier('PhnTokAlign')
        newtier.SetMedia( tiertoken.GetMedia() )

        for anntoken in tiertoken:

            # Create the sequence of phonemes
            # Use only the phoneme with the best score.
            # Don't generate alternatives, and won't never do it.
            beg = anntoken.GetLocation().GetBegin()
            end = anntoken.GetLocation().GetEnd()
            annphons = tierphon.Find(beg,end)
            l = "-".join( ann.GetLabel().GetValue() for ann in annphons )

            # Append in the new tier
            newann = anntoken.Copy()
            score = newann.GetLabel().GetLabel().GetScore()
            newann.GetLabel().SetValue( Text(l,score) )
            newtier.Add( newann )

        return newtier

    # ------------------------------------------------------------------------

    def run(self, phonesname, tokensname, audioname, outputfilename):
        Execute SPPAS Alignment.

        @param phonesname (str - IN) file containing the phonetization
        @param tokensname (str - IN) file containing the tokenization
        @param audioname (str - IN) Audio file name
        @param outputfilename (str - IN) the file name with the result

        @return Transcription

        self.print_diagnosis(audioname, phonesname, tokensname)

        # Get the tiers to be time-aligned
        # ---------------------------------------------------------------

        trsinput = phonesname )
        phontier = self.get_phonestier( trsinput )
        if phontier is None:
            raise IOError("No tier with the phonetization was found.")

            trsinputtok = tokensname )
            toktier = self.get_tokenstier( trsinputtok )
        except Exception:
            toktier = None
            self.print_message("Tokens alignment disabled.", indent=2, status=WARNING_ID)

        # Prepare data
        # -------------------------------------------------------------

        inputaudio = fileutils.fix_audioinput(audioname)
        workdir    = fileutils.fix_workingdir(inputaudio)
        if self._options['clean'] is False:
            self.print_message( "The working directory is: %s"%workdir, indent=3, status=None )

        # Processing...
        # ---------------------------------------------------------------

            trsoutput = self.convert( phontier,toktier,audioname,workdir )
            if toktier is not None:
                trsoutput = self.append_extra(trsoutput)
        except Exception as e:
            self.print_message( str(e) )
            if self._options['clean'] is True:
                shutil.rmtree( workdir )

        # Set media
        # --------------------------------------------------------------

        extm = os.path.splitext(audioname)[1].lower()[1:]
        media = Media( gen_id(), audioname, "audio/"+extm )
        trsoutput.AddMedia( media )
        for tier in trsoutput:
            tier.SetMedia( media )

        # Save results
        # --------------------------------------------------------------
            self.print_message("Save automatic alignment: ",indent=3)
            # Save in a file
   outputfilename,trsoutput )
        except Exception:
            if self._options['clean'] is True:
                shutil.rmtree( workdir )

        # Clean!
        # --------------------------------------------------------------
        # if the audio file was converted.... remove the tmpaudio
        if inputaudio != audioname:
        # Remove the working directory we created
        if self._options['clean'] is True:
            shutil.rmtree( workdir )

    # ------------------------------------------------------------------------
    # Private: some very bad hack...
    # ------------------------------------------------------------------------

    def rustine_others(self, trs):
        """ veritable rustine pour decaler la fin des non-phonemes. """
        tierphon = trs.Find("PhonAlign")
        if tierphon is None:
            return trs

        imax = tierphon.GetSize() - 1
        for i, a in reversed(list(enumerate(tierphon))):
            if i < imax:
                nexta = tierphon[i+1]
                if nexta.GetLabel().GetValue() == "#":
                durnexta = nexta.GetLocation().GetDuration()

                if a.GetLabel().GetValue() == "sil" and durnexta > 0.05:
                    a.GetLocation().SetEndMidpoint( a.GetLocation().GetEndMidpoint() + 0.03 )
                    nexta.GetLocation().SetBeginMidpoint( a.GetLocation().GetEndMidpoint() )

                if a.GetLabel().GetValue() in [ "*", "@@", "fp", "dummy" ] and durnexta > 0.04:
                    a.GetLocation().SetEndMidpoint( a.GetLocation().GetEndMidpoint() + 0.02 )
                    nexta.GetLocation().SetBeginMidpoint( a.GetLocation().GetEndMidpoint() )

        tiertok = trs.Find("TokensAlign")
        if tiertok is None:
            return trs

        imax = tiertok.GetSize() - 1
        for i, a in reversed(list(enumerate(tiertok))):
            if i < imax:
                nexta = tiertok[i+1]
                if nexta.GetLabel().GetValue() == "#":
                durnexta = nexta.GetLocation().GetDuration()

                if a.GetLabel().GetValue() == "sil" and durnexta > 0.05:
                    a.GetLocation().SetEndMidpoint( a.GetLocation().GetEndMidpoint() + 0.03 )
                    nexta.GetLocation().SetBeginMidpoint( a.GetLocation().GetEndMidpoint() )

                if a.GetLabel().GetValue() in [ "*", "@", "euh", "dummy" ] and durnexta > 0.04:
                    a.GetLocation().SetEndMidpoint( a.GetLocation().GetEndMidpoint() + 0.02 )
                    nexta.GetLocation().SetBeginMidpoint( a.GetLocation().GetEndMidpoint() )

        return trs

    # ------------------------------------------------------------------------

    def rustine_liaisons(self, trs):
        """ veritable rustine pour supprimer qqs liaisons en trop. """
        # Only for French!
        if self.alignio.aligntrack.get_model().startswith("fra") is False:
            return trs

        tierphon   = trs.Find("PhonAlign")
        tiertokens = trs.Find("TokensAlign")
        if tiertokens is None or tierphon is None:
            return trs

        # supprime les /z/ et /t/ de fin de mot si leur duree est < 65ms.
        for i, a in reversed(list(enumerate(tierphon))):
            if a.GetLocation().GetDuration() < 0.045 and a.GetLabel().GetValue() in [ "z", "n", "t" ]:
                # get the corresponding token
                for t in tiertokens:
                    # this is not the only phoneme in this token!
                    # and the token is not finishing by a vowel...
                    lastchar = t.GetLabel().GetValue()
                    if len(lastchar)>0:
                        lastchar = lastchar[-1]
                    if a.GetLocation().GetEnd() == t.GetLocation().GetEnd() and a.GetLocation().GetBegin() != t.GetLocation().GetBegin() and not lastchar in ["a", "e", "i", "o", "u", u"é", u"à", u"è"] :
                        # Remove a and extend previous annotation
                        prev = tierphon[i-1]
                        a = tierphon.Pop(i)
                        prev.GetLocation().SetEndMidpoint( a.GetLocation().GetEndMidpoint() )
                        #self.logfile.print_message( "Liaison removed: %s " % a)
                        # Enlever le phoneme de tierphntok!

        return trs