Example #1
0
    def replace_phones(self, reverse=False ):
        """
        Replace the phones by using a mapping table.

        This is mainly useful due to restrictions in some acoustic model toolkits:
        X-SAMPA can't be fully used and a "mapping" is required.
        As for example, the /2/ or /9/ can't be represented directly in an
        HTK-ASCII acoustic model. We commonly replace respectively by /eu/ and
        /oe/.

        Notice that '+' and '-' can't be used as a phone name.

        @param reverse (bool) reverse the replacement direction.

        """
        if self.repllist.get_size() == 0:
            return
        delimiters = ["-","+"]

        oldreverse = self.repllist.reverse
        self.repllist.set_reverse(reverse)

        # Replace in the tiedlist
        newtied = TiedList()

        for observed in self.tiedlist.observed:
            mapped = self.repllist.map( observed,delimiters )
            newtied.add_observed( mapped )
        for tied,observed in self.tiedlist.tied.items():
            mappedtied     = self.repllist.map( tied, delimiters)
            mappedobserved = self.repllist.map( observed, delimiters)
            newtied.add_tied(mappedtied, mappedobserved)
        self.tiedlist = newtied

        # Replace in HMMs
        for hmm in self.hmms:
            hmm.set_name( self.repllist.map( hmm.name, delimiters) )

            states = hmm.definition['states']
            if all(isinstance(state['state'], (collections.OrderedDict,collections.defaultdict)) for state in states) is False:
                for state in states:
                    if isinstance(state['state'], (collections.OrderedDict,collections.defaultdict)) is False:
                        tab = state['state'].split('_')
                        tab[1] = self.repllist.map_entry( tab[1] )
                        state['state'] = "_".join(tab)

            transition = hmm.definition['transition']
            if isinstance(transition, (collections.OrderedDict,collections.defaultdict)) is False:
                tab = transition.split('_')
                tab[1] = self.repllist.map_entry( tab[1] )
                transition = "_".join(tab)

        self.repllist.set_reverse(oldreverse)
Example #2
0
    def __init__(self):
        """
        Constructor.

        """
        self.macros   = None
        self.hmms     = []
        self.tiedlist = TiedList()
        self.repllist = Mapping()
Example #3
0
class AcModel:
    """
    @author:       Brigitte Bigi
    @organization: Laboratoire Parole et Langage, Aix-en-Provence, France
    @contact:      [email protected]
    @license:      GPL, v3
    @copyright:    Copyright (C) 2011-2016  Brigitte Bigi
    @summary:      Acoustic model representation.

    A model is made of:
       - 'macros' is an OrderedDict of options, transitions, states, ...
       - 'hmms' models (one per phone/biphone/triphone): list of HMM instances
       - a tiedlist (if any)
       - a mapping table to replace phone names.

    """
    def __init__(self):
        """
        Constructor.

        """
        self.macros   = None
        self.hmms     = []
        self.tiedlist = TiedList()
        self.repllist = Mapping()

    # -----------------------------------------------------------------------
    # Files
    # -----------------------------------------------------------------------

    def load(self, directory):
        """
        Load all known data from a directory.
        The default file names are:
            - hmmdefs for an HTK-ASCII acoustic model
            - tiedlist
            - monophones.repl

        @param directory (str)
        @return list of loaded file names

        """
        l = []
        hmmdefsfiles = glob.glob(os.path.join(directory,'hmmdefs'))
        if len( hmmdefsfiles ) == 0:
            raise IOError('Missing hmmdefs file in %s'%directory)
        self.load_htk( hmmdefsfiles[0] )
        l.append( hmmdefsfiles[0] )

        tiedlistfiles = glob.glob(os.path.join(directory,'tiedlist'))
        if len( tiedlistfiles ) == 1:
            self.load_tiedlist( tiedlistfiles[0] )
            l.append( tiedlistfiles[0] )

        replfiles = glob.glob(os.path.join(directory,'monophones.repl'))
        if len( replfiles ) == 1:
            self.load_phonesrepl( replfiles[0] )
            l.append( replfiles[0] )

        return l

    # -----------------------------------------------------------------------

    def save(self, directory):
        """
        Save all data into a directory.
        The default file names are:
            - hmmdefs for an HTK-ASCII acoustic model
            - tiedlist
            - monophones.repl

        @param directory (str)
        @return list of saved file names

        """
        if os.path.isdir( directory ) is False:
            os.mkdir( directory )

        l = []
        self.save_htk( os.path.join(directory,'hmmdefs') )
        l.append( os.path.join(directory,'hmmdefs') )

        if self.tiedlist.is_empty() is False:
            self.save_tiedlist( os.path.join(directory,'tiedlist') )
            l.append( os.path.join(directory,'tiedlist') )

        if self.repllist.is_empty() is False:
            self.save_phonesrepl( os.path.join(directory,'monophones.repl') )
            l.append( os.path.join(directory,'monophones.repl') )

        return l

    # -----------------------------------------------------------------------

    def load_phonesrepl(self, filename):
        """
        Load a replacement table of phone names from a file.

        @param filename (str)

        """
        try:
            self.repllist.load_from_ascii( filename )
            # Some HACK...
            # because '+' and '-' are the biphones/triphones delimiters,
            # they can't be used as phone name.
            self.repllist.remove('+')
            self.repllist.remove('-')

        except Exception:
            pass

    # -----------------------------------------------------------------------

    def save_phonesrepl(self, filename):
        """
        Save a replacement table of phone names into a file.

        @param filename (str)

        """
        try:
            self.repllist.save_as_ascii( filename )
        except Exception:
            pass

    # -----------------------------------------------------------------------

    def load_tiedlist(self, filename):
        """
        Load a tiedlist from a file.

        @param filename (str)

        """
        try:
            self.tiedlist.load( filename )
        except Exception:
            pass

    # -----------------------------------------------------------------------

    def save_tiedlist(self, filename):
        """
        Save a tiedlist into a file.

        @param filename (str)

        """
        try:
            self.tiedlist.save( filename )
        except Exception:
            pass

    # -----------------------------------------------------------------------

    def load_htk(self, *args):
        """
        Load an HTK model from one or more files.

        @param args: Filenames of the model (e.g. macros and/or hmmdefs)

        """
        htkmodel = HtkIO( *args )
        self.macros = htkmodel.macros
        self.hmms   = htkmodel.hmms

    # -----------------------------------------------------------------------

    def save_htk(self, filename):
        """
        Save the model into a file, in HTK-ASCII standard format.

        @param filename: File where to save the model.

        """
        htkmodel = HtkIO()
        htkmodel.set(self.macros,self.hmms)
        htkmodel.save( filename )

    # -----------------------------------------------------------------------
    # HMM
    # -----------------------------------------------------------------------

    def get_hmm(self, phone):
        """
        Return the hmm corresponding to the given phoneme.

        @param phone (str) the phoneme name to get hmm
        @raise ValueError if phoneme is not in the model

        """
        hmms = [h for h in self.hmms if h.name==phone]
        if len(hmms) == 1:
            return hmms[0]
        raise ValueError('%s not in the model'%phone)

    # -----------------------------------------------------------------------

    def append_hmm(self, hmm):
        """
        Append an HMM to the model.

        @param hmm (OrderedDict)
        @raise TypeError, ValueError

        """
        if isinstance(hmm,HMM) is False:
            raise TypeError('Expected an HMM instance. Got %s'%type(hmm))

        if hmm.name is None:
            raise TypeError('Expected an hmm with a name as key.')
        for h in self.hmms:
            if h.name == hmm.name:
                raise ValueError('Duplicate HMM is forbidden. %s already in the model.'%hmm.name)

        if hmm.definition is None:
            raise TypeError('Expected an hmm with a definition as key.')
        if hmm.definition.get('states',None) is None or hmm.definition.get('transition',None) is None:
            raise TypeError('Expected an hmm with a definition including states and transitions.')

        self.hmms.append(hmm)

    # -----------------------------------------------------------------------

    def pop_hmm(self, phone):
        """
        Remove an HMM of the model.

        @param phone (str) the phoneme name to get hmm
        @raise ValueError if phoneme is not in the model

        """
        hmm = self.get_hmm(phone)
        idx = self.hmms.index(hmm)
        self.hmms.pop(idx)

    # -----------------------------------------------------------------------
    # Manage the model
    # -----------------------------------------------------------------------

    def replace_phones(self, reverse=False ):
        """
        Replace the phones by using a mapping table.

        This is mainly useful due to restrictions in some acoustic model toolkits:
        X-SAMPA can't be fully used and a "mapping" is required.
        As for example, the /2/ or /9/ can't be represented directly in an
        HTK-ASCII acoustic model. We commonly replace respectively by /eu/ and
        /oe/.

        Notice that '+' and '-' can't be used as a phone name.

        @param reverse (bool) reverse the replacement direction.

        """
        if self.repllist.get_size() == 0:
            return
        delimiters = ["-","+"]

        oldreverse = self.repllist.reverse
        self.repllist.set_reverse(reverse)

        # Replace in the tiedlist
        newtied = TiedList()

        for observed in self.tiedlist.observed:
            mapped = self.repllist.map( observed,delimiters )
            newtied.add_observed( mapped )
        for tied,observed in self.tiedlist.tied.items():
            mappedtied     = self.repllist.map( tied, delimiters)
            mappedobserved = self.repllist.map( observed, delimiters)
            newtied.add_tied(mappedtied, mappedobserved)
        self.tiedlist = newtied

        # Replace in HMMs
        for hmm in self.hmms:
            hmm.set_name( self.repllist.map( hmm.name, delimiters) )

            states = hmm.definition['states']
            if all(isinstance(state['state'], (collections.OrderedDict,collections.defaultdict)) for state in states) is False:
                for state in states:
                    if isinstance(state['state'], (collections.OrderedDict,collections.defaultdict)) is False:
                        tab = state['state'].split('_')
                        tab[1] = self.repllist.map_entry( tab[1] )
                        state['state'] = "_".join(tab)

            transition = hmm.definition['transition']
            if isinstance(transition, (collections.OrderedDict,collections.defaultdict)) is False:
                tab = transition.split('_')
                tab[1] = self.repllist.map_entry( tab[1] )
                transition = "_".join(tab)

        self.repllist.set_reverse(oldreverse)

    # -----------------------------------------------------------------------

    def fill_hmms(self):
        """
        Fill HMM states and transitions, i.e.:
           - replace all the "ST_..." by the corresponding macro, for states.
           - replace all the "T_..." by the corresponding macro, for transitions.

        """
        for hmm in self.hmms:

            states     = hmm.definition['states']
            transition = hmm.definition['transition']

            if all(isinstance(state['state'],(collections.OrderedDict,collections.defaultdict)) for state in states) is False:
                newstates = self._fill_states( states )
                if all(s is not None for s in newstates):
                    hmm.definition['states'] = newstates
                else:
                    raise ValueError('No corresponding macro for states: %s'%states)

            if isinstance(transition, (collections.OrderedDict,collections.defaultdict)) is False:
                newtrs = self._fill_transition( transition )
                if newtrs is not None:
                    hmm.definition['transition'] = newtrs
                else:
                    raise ValueError('No corresponding macro for transition: %s'%transition)

        # No more need of states and transitions in macros
        newmacros = []
        if self.macros is not None:
            for m in self.macros:
                if m.get('transition',None) is None and m.get('state',None) is None:
                    newmacros.append( m )
        self.macros = newmacros

    # -----------------------------------------------------------------------

    def create_model(self, macros, hmms):
        """
        Create an empty AcModel and return it.

        @param macros is an OrderedDict of options, transitions, states, ...
        @param hmms models (one per phone/biphone/triphone) is a list of HMM instances

        """
        model = AcModel()
        model.macros = macros
        model.hmms   = hmms
        return model

    # -----------------------------------------------------------------------

    def extract_monophones(self):
        """
        Return an Acoustic Model that includes only monophones:
            - hmms and macros are selected,
            - repllist is copied,
            - tiedlist is ignored.

        @return AcModel

        """
        ac = AcModel()

        # The macros
        if self.macros is not None:
            ac.macros = copy.deepcopy( self.macros )

        # The HMMs
        for h in self.hmms:
            if not "+" in h.name and not "-" in h.name:
                ac.append_hmm( copy.deepcopy(h) )
        ac.fill_hmms()

        # The repl mapping table
        ac.repllist = copy.deepcopy( self.repllist )

        return ac

    # -----------------------------------------------------------------------

    def get_mfcc_parameter_kind(self):
        """
        Return the MFCC parameter kind, as a string, or an empty string.

        """
        if self.macros is None:
            return ""

        for m in self.macros:
            option = m.get('options',None)
            if option is not None:
                definition = option.get('definition',None)
                if definition is not None:
                    for defn in definition:
                        parameter_kind = defn.get('parameter_kind', None)
                        if parameter_kind is not None:
                            # Check if of MFCC type...
                            if parameter_kind['base'].lower() == "mfcc":
                                return "mfcc_" + "".join(parameter_kind['options'])

        return ""

    # -----------------------------------------------------------------------

    def merge_model(self, other, gamma=1.):
        """
        Merge another model with self.
        All new phones/biphones/triphones are added and the shared ones are
        combined using a static linear interpolation.

        @param other (AcModel) the AcModel to be merged with.
        @param gamma (float) coefficient to apply to the model: between 0.
        and 1. This means that a coefficient value of 1. indicates to keep
        the current version of each shared hmm.

        @raise TypeError, ValueError
        @return a tuple indicating the number of hmms that was
        appended, interpolated, keeped, changed.

        """
        # Check the given input data
        if gamma < 0. or gamma > 1.:
            raise ValueError('Gamma coefficient must be between 0. and 1. Got %f'%gamma)
        if isinstance(other, AcModel) is False:
            raise TypeError('Expected an AcModel instance.')

        # Check the MFCC parameter kind:
        # we can only interpolate identical models.
        if self.get_mfcc_parameter_kind() != other.get_mfcc_parameter_kind() :
            raise TypeError('Can only merge models of identical MFCC parameter kind.')

        # Fill HMM states and transitions, i.e.:
        #   - replace all the "ST_..." by the corresponding macro, for states.
        #   - replace all the "T_..." by the corresponding macro, for transitions.
        self.fill_hmms()
        othercopy = copy.deepcopy( other )
        othercopy.fill_hmms()

        # Merge the list of HMMs
        appended     = 0
        interpolated = 0
        keeped       = len(self.hmms)
        changed      = 0
        for hmm in othercopy.hmms:
            got = False
            for h in self.hmms:
                if h.name == hmm.name:
                    got = True
                    if gamma == 1.0:
                        pass
                    elif gamma == 0.:
                        self.pop_hmm( hmm.name )
                        self.append_hmm( hmm )
                        changed = changed + 1
                        keeped  = keeped  - 1
                    else:
                        selfhmm = self.get_hmm( hmm.name )
                        res = selfhmm.static_linear_interpolation(hmm, gamma)
                        if res is True:
                            interpolated = interpolated + 1
                            keeped       = keeped       - 1
                    break
            if got is False:
                self.append_hmm(hmm)
                appended = appended + 1

        # Merge the tiedlists
        self.tiedlist.merge( other.tiedlist )

        for k,v in other.repllist.get_dict().items():
            if self.repllist.is_key(k) is False and self.repllist.is_value(v) is False:
                self.repllist.add(k,v)

        return (appended,interpolated,keeped,changed)

    # -----------------------------------------------------------------------
    # Private
    # -----------------------------------------------------------------------

    def __str__(self):
        strmacros=json.dumps(self.macros,indent=2)
        strhmms="\n".join( [str(h) for h in self.hmms] )
        return "MACROS:"+strmacros+"\nHMMS:"+strhmms

    # ----------------------------------

    def _fill_states(self, states):
        newstates = []
        for state in states:
            if isinstance(state['state'], (collections.OrderedDict,collections.defaultdict)) is True:
                newstates.append( state )
                continue
            news = copy.deepcopy(state)
            news['state'] = self._fill_state( state['state'] )
            newstates.append( news )
        return newstates

    # ----------------------------------

    def _fill_state(self, state):
        newstate = None
        if self.macros is not None:
            for macro in self.macros:
                if macro.get('state', None):
                    if macro['state']['name'] == state:
                        newstate = copy.deepcopy( macro['state']['definition'] )
        return newstate

    # ----------------------------------

    def _fill_transition(self, transition):
        newtransition = None
        if self.macros is not None:
            for macro in self.macros:
                if macro.get('transition', None):
                    if macro['transition']['name'] == transition:
                        newtransition = copy.deepcopy( macro['transition']['definition'] )
        return newtransition

    # ----------------------------------

    def _create_default(self):
        return collections.OrderedDict()

    # ----------------------------------

    def create_parameter_kind(self, base=None, options=[]):
        result = self._create_default()
        result['base'] = base
        result['options'] = options
        return result

    # ----------------------------------

    def create_options(self, vector_size, parameter_kind=None, stream_info=None, duration_kind="nulld", covariance_kind="diagc"):
        macro = self._create_default()
        options = []

        if stream_info:
            option = self._create_default()
            option['stream_info'] = self._create_default()
            option['stream_info']['count'] = len(stream_info)
            option['stream_info']['sizes'] = stream_info
            options.append(option)

        option = self._create_default()
        option['vector_size'] = vector_size
        options.append(option)

        option = self._create_default()
        option['duration_kind'] = duration_kind
        options.append(option)

        if parameter_kind:
            option = self._create_default()
            option['parameter_kind'] = parameter_kind
            options.append(option)

        option = self._create_default()
        option['covariance_kind'] = covariance_kind
        options.append(option)

        macro['options'] = {'definition': options}

        return macro