コード例 #1
0
def train_gmm(name, vta):

    vta = [frame for frame, label in vta if label == name]

    gmm = GMM(n_features=36, n_components=1, n_iter=n_iter)
    gmm.fit(vta)
    while len(gmm.weights) < n_mixies:
        mixup(gmm, vta, name)
    gmm.save_model('model_voip/vad_%s_sds_mfcc.gmm' % name)
    return
コード例 #2
0
    def __init__(self, cfg):
        self.cfg = cfg

        self.audio_recorded_in = []

        self.gmm_speech = GMM()
        self.gmm_speech.load_model(self.cfg['VAD']['gmm']['speech_model'])
        self.gmm_sil = GMM()
        self.gmm_sil.load_model(self.cfg['VAD']['gmm']['sil_model'])

        self.log_probs_speech = deque(
            maxlen=self.cfg['VAD']['gmm']['filter_length'])
        self.log_probs_sil = deque(
            maxlen=self.cfg['VAD']['gmm']['filter_length'])

        self.last_decision = 0.0

        if self.cfg['VAD']['gmm']['frontend'] == 'MFCC':
            self.front_end = MFCCFrontEnd(
                self.cfg['Audio']['sample_rate'],
                self.cfg['VAD']['gmm']['framesize'],
                self.cfg['VAD']['gmm']['usehamming'],
                self.cfg['VAD']['gmm']['preemcoef'],
                self.cfg['VAD']['gmm']['numchans'],
                self.cfg['VAD']['gmm']['ceplifter'],
                self.cfg['VAD']['gmm']['numceps'],
                self.cfg['VAD']['gmm']['enormalise'],
                self.cfg['VAD']['gmm']['zmeansource'],
                self.cfg['VAD']['gmm']['usepower'],
                self.cfg['VAD']['gmm']['usec0'],
                self.cfg['VAD']['gmm']['usecmn'],
                self.cfg['VAD']['gmm']['usedelta'],
                self.cfg['VAD']['gmm']['useacc'],
                self.cfg['VAD']['gmm']['n_last_frames'],
                self.cfg['VAD']['gmm']['lofreq'],
                self.cfg['VAD']['gmm']['hifreq'])
        else:
            raise ASRException('Unsupported frontend: %s' %
                               (self.cfg['VAD']['gmm']['frontend'], ))
コード例 #3
0
    p_sil.join()
    print "Sil GMM training finished"
    print datetime.datetime.now()
    p_speech.join()
    print "Speech GMM training finished"
    print datetime.datetime.now()

    #train_speech_gmm()
    #train_sil_gmm()

    print '-' * 120
    print 'VAD GMM test'
    print datetime.datetime.now()
    print '-' * 120
    gmm_speech = GMM(n_features=0)
    gmm_speech.load_model('model_voip/vad_speech_sds_mfcc.gmm')
    gmm_sil = GMM(n_features=0)
    gmm_sil.load_model('model_voip/vad_sil_sds_mfcc.gmm')

    vta = test

    print "Length of test data:", len(vta)
    print datetime.datetime.now()

    accuracy = 0.0
    n = 0
    for frame, label in vta:
        log_prob_speech = gmm_speech.score(frame)
        log_prob_sil = gmm_sil.score(frame)