def train_gmm(name, vta): vta = [frame for frame, label in vta if label == name] gmm = GMM(n_features=36, n_components=1, n_iter=n_iter) gmm.fit(vta) while len(gmm.weights) < n_mixies: mixup(gmm, vta, name) gmm.save_model('model_voip/vad_%s_sds_mfcc.gmm' % name) return
def __init__(self, cfg): self.cfg = cfg self.audio_recorded_in = [] self.gmm_speech = GMM() self.gmm_speech.load_model(self.cfg['VAD']['gmm']['speech_model']) self.gmm_sil = GMM() self.gmm_sil.load_model(self.cfg['VAD']['gmm']['sil_model']) self.log_probs_speech = deque( maxlen=self.cfg['VAD']['gmm']['filter_length']) self.log_probs_sil = deque( maxlen=self.cfg['VAD']['gmm']['filter_length']) self.last_decision = 0.0 if self.cfg['VAD']['gmm']['frontend'] == 'MFCC': self.front_end = MFCCFrontEnd( self.cfg['Audio']['sample_rate'], self.cfg['VAD']['gmm']['framesize'], self.cfg['VAD']['gmm']['usehamming'], self.cfg['VAD']['gmm']['preemcoef'], self.cfg['VAD']['gmm']['numchans'], self.cfg['VAD']['gmm']['ceplifter'], self.cfg['VAD']['gmm']['numceps'], self.cfg['VAD']['gmm']['enormalise'], self.cfg['VAD']['gmm']['zmeansource'], self.cfg['VAD']['gmm']['usepower'], self.cfg['VAD']['gmm']['usec0'], self.cfg['VAD']['gmm']['usecmn'], self.cfg['VAD']['gmm']['usedelta'], self.cfg['VAD']['gmm']['useacc'], self.cfg['VAD']['gmm']['n_last_frames'], self.cfg['VAD']['gmm']['lofreq'], self.cfg['VAD']['gmm']['hifreq']) else: raise ASRException('Unsupported frontend: %s' % (self.cfg['VAD']['gmm']['frontend'], ))
p_sil.join() print "Sil GMM training finished" print datetime.datetime.now() p_speech.join() print "Speech GMM training finished" print datetime.datetime.now() #train_speech_gmm() #train_sil_gmm() print '-' * 120 print 'VAD GMM test' print datetime.datetime.now() print '-' * 120 gmm_speech = GMM(n_features=0) gmm_speech.load_model('model_voip/vad_speech_sds_mfcc.gmm') gmm_sil = GMM(n_features=0) gmm_sil.load_model('model_voip/vad_sil_sds_mfcc.gmm') vta = test print "Length of test data:", len(vta) print datetime.datetime.now() accuracy = 0.0 n = 0 for frame, label in vta: log_prob_speech = gmm_speech.score(frame) log_prob_sil = gmm_sil.score(frame)