Ejemplos de GMM.load_model en Python

Lenguaje de programación: Python

Namespace/Package Name: alex.ml.gmm

Clase / Tipo: GMM

Método / Función: load_model

Ejemplos en hotexamples.com: 4

Python GMM.load_model - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de alex.ml.gmm.GMM.load_model extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

GMM(3)

load_model(2)

score(2)

fit(1)

save_model(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: train_vad_gmm.py Proyecto: UFAL-DSG/alex

    print "Sil GMM training finished"
    print datetime.datetime.now()
    p_speech.join()
    print "Speech GMM training finished"
    print datetime.datetime.now()

    #train_speech_gmm()
    #train_sil_gmm()


    print '-' * 120
    print 'VAD GMM test'
    print datetime.datetime.now()
    print '-' * 120
    gmm_speech = GMM(n_features=0)
    gmm_speech.load_model('model_voip/vad_speech_sds_mfcc.gmm')
    gmm_sil = GMM(n_features=0)
    gmm_sil.load_model('model_voip/vad_sil_sds_mfcc.gmm')


    vta = test

    print "Length of test data:", len(vta)
    print datetime.datetime.now()

    accuracy = 0.0
    n = 0
    for frame, label in vta:
        log_prob_speech = gmm_speech.score(frame)
        log_prob_sil = gmm_sil.score(frame)

Ejemplo n.º 2

Mostrar archivo

    p_sil.join()
    print "Sil GMM training finished"
    print datetime.datetime.now()
    p_speech.join()
    print "Speech GMM training finished"
    print datetime.datetime.now()

    #train_speech_gmm()
    #train_sil_gmm()

    print '-' * 120
    print 'VAD GMM test'
    print datetime.datetime.now()
    print '-' * 120
    gmm_speech = GMM(n_features=0)
    gmm_speech.load_model('model_voip/vad_speech_sds_mfcc.gmm')
    gmm_sil = GMM(n_features=0)
    gmm_sil.load_model('model_voip/vad_sil_sds_mfcc.gmm')

    vta = test

    print "Length of test data:", len(vta)
    print datetime.datetime.now()

    accuracy = 0.0
    n = 0
    for frame, label in vta:
        log_prob_speech = gmm_speech.score(frame)
        log_prob_sil = gmm_sil.score(frame)

        ratio = log_prob_speech - log_prob_sil

Ejemplo n.º 3

Mostrar archivo

class GMMVAD():
    """ This is implementation of a GMM based voice activity detector.

    It only implements decisions whether input frame is speech of non speech.
    It returns the posterior probability of speech for N last input frames.
    """
    def __init__(self, cfg):
        self.cfg = cfg

        self.audio_recorded_in = []

        self.gmm_speech = GMM()
        self.gmm_speech.load_model(self.cfg['VAD']['gmm']['speech_model'])
        self.gmm_sil = GMM()
        self.gmm_sil.load_model(self.cfg['VAD']['gmm']['sil_model'])

        self.log_probs_speech = deque(
            maxlen=self.cfg['VAD']['gmm']['filter_length'])
        self.log_probs_sil = deque(
            maxlen=self.cfg['VAD']['gmm']['filter_length'])

        self.last_decision = 0.0

        if self.cfg['VAD']['gmm']['frontend'] == 'MFCC':
            self.front_end = MFCCFrontEnd(
                self.cfg['Audio']['sample_rate'],
                self.cfg['VAD']['gmm']['framesize'],
                self.cfg['VAD']['gmm']['usehamming'],
                self.cfg['VAD']['gmm']['preemcoef'],
                self.cfg['VAD']['gmm']['numchans'],
                self.cfg['VAD']['gmm']['ceplifter'],
                self.cfg['VAD']['gmm']['numceps'],
                self.cfg['VAD']['gmm']['enormalise'],
                self.cfg['VAD']['gmm']['zmeansource'],
                self.cfg['VAD']['gmm']['usepower'],
                self.cfg['VAD']['gmm']['usec0'],
                self.cfg['VAD']['gmm']['usecmn'],
                self.cfg['VAD']['gmm']['usedelta'],
                self.cfg['VAD']['gmm']['useacc'],
                self.cfg['VAD']['gmm']['n_last_frames'],
                self.cfg['VAD']['gmm']['lofreq'],
                self.cfg['VAD']['gmm']['hifreq'])
        else:
            raise ASRException('Unsupported frontend: %s' %
                               (self.cfg['VAD']['gmm']['frontend'], ))

    def decide(self, data):
        """Processes the input frame whether the input segment is speech or non speech.

        The returned values can be in range from 0.0 to 1.0.
        It returns 1.0 for 100% speech segment and 0.0 for 100% non speech segment.
        """

        data = struct.unpack('%dh' % (len(data) / 2, ), data)
        self.audio_recorded_in.extend(data)

        while len(
                self.audio_recorded_in) > self.cfg['VAD']['gmm']['framesize']:
            frame = self.audio_recorded_in[:self.
                                           cfg['VAD']['gmm']['framesize']]
            self.audio_recorded_in = self.audio_recorded_in[
                self.cfg['VAD']['gmm']['frameshift']:]

            mfcc = self.front_end.param(frame)

            log_prob_speech = self.gmm_speech.score(mfcc)
            log_prob_sil = self.gmm_sil.score(mfcc)

            self.log_probs_speech.append(log_prob_speech)
            self.log_probs_sil.append(log_prob_sil)

            log_prob_speech_avg = 0.0
            for log_prob_speech, log_prob_sil in zip(self.log_probs_speech,
                                                     self.log_probs_sil):
                log_prob_speech_avg += log_prob_speech - logsumexp(
                    [log_prob_speech, log_prob_sil])
            log_prob_speech_avg /= len(self.log_probs_speech)

            prob_speech_avg = np.exp(log_prob_speech_avg)

            #      print 'prob_speech_avg: %5.3f' % prob_speech_avg

            self.last_decision = prob_speech_avg

        # returns a speech / non-speech decisions
        return self.last_decision

Ejemplo n.º 4

Mostrar archivo

Archivo: gmm.py Proyecto: AoJ/alex

class GMMVAD():
    """ This is implementation of a GMM based voice activity detector.

    It only implements decisions whether input frame is speech of non speech.
    It returns the posterior probability of speech for N last input frames.
    """
    def __init__(self, cfg):
        self.cfg = cfg

        self.audio_recorded_in = []

        self.gmm_speech = GMM()
        self.gmm_speech.load_model(self.cfg['VAD']['gmm']['speech_model'])
        self.gmm_sil = GMM()
        self.gmm_sil.load_model(self.cfg['VAD']['gmm']['sil_model'])

        self.log_probs_speech = deque(maxlen=self.cfg['VAD']['gmm']['filter_length'])
        self.log_probs_sil = deque(maxlen=self.cfg['VAD']['gmm']['filter_length'])

        self.last_decision = 0.0

        if self.cfg['VAD']['gmm']['frontend'] == 'MFCC':
            self.front_end = MFCCFrontEnd(
                self.cfg['Audio']['sample_rate'], self.cfg['VAD']['gmm']['framesize'],
                self.cfg['VAD']['gmm']['usehamming'], self.cfg['VAD']['gmm']['preemcoef'],
                self.cfg['VAD']['gmm']['numchans'], self.cfg['VAD']['gmm']['ceplifter'],
                self.cfg['VAD']['gmm']['numceps'], self.cfg['VAD']['gmm']['enormalise'],
                self.cfg['VAD']['gmm']['zmeansource'], self.cfg['VAD']['gmm']['usepower'],
                self.cfg['VAD']['gmm']['usec0'], self.cfg['VAD']['gmm']['usecmn'],
                self.cfg['VAD']['gmm']['usedelta'], self.cfg['VAD']['gmm']['useacc'],
                self.cfg['VAD']['gmm']['n_last_frames'],
                self.cfg['VAD']['gmm']['lofreq'], self.cfg['VAD']['gmm']['hifreq'])
        else:
            raise ASRException('Unsupported frontend: %s' % (self.cfg['VAD']['gmm']['frontend'], ))

    def decide(self, data):
        """Processes the input frame whether the input segment is speech or non speech.

        The returned values can be in range from 0.0 to 1.0.
        It returns 1.0 for 100% speech segment and 0.0 for 100% non speech segment.
        """

        data = struct.unpack('%dh' % (len(data) / 2, ), data)
        self.audio_recorded_in.extend(data)

        while len(self.audio_recorded_in) > self.cfg['VAD']['gmm']['framesize']:
            frame = self.audio_recorded_in[:self.cfg['VAD']['gmm']['framesize']]
            self.audio_recorded_in = self.audio_recorded_in[self.cfg['VAD']['gmm']['frameshift']:]

            mfcc = self.front_end.param(frame)

            log_prob_speech = self.gmm_speech.score(mfcc)
            log_prob_sil = self.gmm_sil.score(mfcc)

            self.log_probs_speech.append(log_prob_speech)
            self.log_probs_sil.append(log_prob_sil)

            log_prob_speech_avg = 0.0
            for log_prob_speech, log_prob_sil in zip(self.log_probs_speech, self.log_probs_sil):
                log_prob_speech_avg += log_prob_speech - logsumexp([log_prob_speech, log_prob_sil])
            log_prob_speech_avg /= len(self.log_probs_speech)

            prob_speech_avg = np.exp(log_prob_speech_avg)

#      print 'prob_speech_avg: %5.3f' % prob_speech_avg

            self.last_decision = prob_speech_avg

        # returns a speech / non-speech decisions
        return self.last_decision