def __init__(self, ubmfn = None, reject_threshold = 10): self.features = [] self.gmmset = GMMSet() self.classes = [] self.reject_threshold = reject_threshold if ubmfn is not None: self.ubm = self.load(ubmfn)
def train(self): self.gmmset = GMMSet() start_time = time.time() for name, feats in self.features.items(): try: self.gmmset.fit_new(feats, name) except Exception as e : print ("%s failed"%(name)) print (time.time() - start_time, " seconds")
def train(self): self.gmmset = GMMSet() start_time1 = time.time() print("Begin to train") for name, feats in self.features.items(): try: start_time2 = time.time() self.gmmset.fit_new(feats, name) print(name," trained",time.time() - start_time2, "seconds" ) except Exception as e : print ("%s failed because of %s"%(name,e)) print ("Train ",time.time() - start_time1, " seconds")
def _get_gmm_set(self): if self.UBM_MODEL_FILE and os.path.isfile(self.UBM_MODEL_FILE): try: from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) except Exception as e: print "Warning: failed to import gmmset. You may forget to compile gmm:" print e print "Try running `make -C src/gmm` to compile gmm module." print "But gmm from sklearn will work as well! Using it now!" return GMMSet() return GMMSet()
class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() def enroll(self, name, fs, signal): print("enroll "+name) feat = get_feature(fs, signal) self.features[name].extend(feat) def train(self): self.gmmset = GMMSet() start_time = time.time() for name, feats in self.features.items(): try: self.gmmset.fit_new(feats, name) except Exception as e : print ("%s failed"%(name)) print (time.time() - start_time, " seconds") def CheckEnroll(self): for name, feats in self.features.items(): print("%s " % (name)) def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'wb') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() def predict(self, fs, signal): """ return a label (name) """ try: feat = get_feature(fs, signal) except Exception as e: print (e) return self.gmmset.predict_one(feat) @staticmethod def load(fname): print(fname) """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD()
class ModelInterface(object): UBM_MODEL_FILE = 'model/ubm.mixture-32.utt-300.model' def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): self.vad.init_noise(fs, signal) def filter(self, fs, signal): ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: return ret return np.array([]) def enroll(self, name, fs, signal): feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) else: return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal, reject=False): from gmmset import GMMSetPyGMM if GMMSet is not GMMSetPyGMM: reject = False try: feat = mix_feature((fs, signal)) except Exception as e: print str(e) return None if reject: try: l = self.gmmset.predict_one_with_rejection(feat) return l except Exception as e: print str(e) return self.gmmset.predict_one(feat) def dump(self, fname): self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) self.gmmset.after_pickle() @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface(object): UBM_MODEL_FILE = 'model/ubm.mixture-32.utt-300.model' def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): self.vad.init_noise(fs, signal) def filter(self, fs, signal): ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: # signal is filtered by VAD return ret return np.array([]) def enroll(self, name, fs, signal): feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): try: from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) except Exception as e: print "Warning: failed to import gmmset. You may forget to compile gmm:" print e print "Try running `make -C src/gmm` to compile gmm module." print "But gmm from sklearn will work as well! Using it now!" return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal, reject=False): from gmmset import GMMSetPyGMM if GMMSet is not GMMSetPyGMM: reject = False try: feat = mix_feature((fs, signal)) except Exception as e: print tb.format_exc() return None if reject: try: return self.gmmset.predict_one_with_rejection(feat) except Exception as e: print tb.format_exc() return self.gmmset.predict_one(feat) def dump(self, fname): self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() # self.vad = VAD() # def init_noise(self, fs, signal): # """ # init vad from environment noise # """ # self.vad.init_noise(fs, signal) # def filter(self, fs, signal): # """ # use VAD (voice activity detection) to filter out silence part of a signal # """ # ret, intervals = self.vad.filter(fs, signal) # orig_len = len(signal) # if len(ret) > orig_len / 3: # signal is filtered by VAD # return ret #return np.array([]) def enroll(self, name, fs, signal): feat = get_feature(fs, signal) #print("feat:",feat) #print(len(feat)) self.features[name].extend(feat) def mfcc_dump(self, fname): """ dump all features to file""" with open(fname, 'wb') as f: pickle.dump(self.features, f, -1) def train(self): self.gmmset = GMMSet() start_time1 = time.time() print("Begin to train") for name, feats in self.features.items(): try: start_time2 = time.time() self.gmmset.fit_new(feats, name) print(name," trained",time.time() - start_time2, "seconds" ) except Exception as e : print ("%s failed because of %s"%(name,e)) print ("Train ",time.time() - start_time1, " seconds") def dump(self, save_dir): """ dump all models to file""" # 每个GMM模型独立保存一个模型文件 for i in range(len(self.gmmset.y)): label=self.gmmset.y[i] model=self.gmmset.gmms[i] file_name=label+'.m' save_path=os.path.join(save_dir,file_name) with open(save_path, 'wb') as f: # 这里保存的是skgmm.GMMSet object pickle.dump(model, f, -1) #self.gmmset.after_pickle() # def predict(self, fs, signal): # """ # return a label (name) # """ # try: # feat = get_feature(fs, signal) # except Exception as e: # print (e) # return self.gmmset.predict_one(feat) def predict(self, feat): """ return a label (name) """ #return self.gmmset.predict_one(feat) return self.predict_one(feat) @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: label = os.path.basename(fname.rstrip('/')).split('.')[0] R = pickle.load(f) #R.gmmset.after_pickle() return label,R
def task_predict(input_files, input_model): # 把输入的多个模型目录字符串分离为目录列表 input_models = [os.path.expanduser(k) for k in input_model.strip().split()] # 把各个目录下的模型列表解压出来组合成一个迭代器 models = itertools.chain(*(glob.glob(m) for m in input_models)) # 生成并加载包括所有模型文件(skgmm.GMMSet object)的列表 models = [ModelInterface.load(m) for m in models] if len(models) == 0: print("No model file found in %s" % input_model) sys.exit(1) # 定义统计准确率的变量 right = 0 right1 = 0 wrong = 0 wrong1 = 0 num = 0 # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果 for f in glob.glob(os.path.expanduser(input_files)): start_time = time.time() fs, signal = read_wav(f) print(f) feat = get_feature(fs, signal) #print("Get feature ", time.time() - start_time, " seconds") predict_result = [] f_models = [(feat, m) for m in models] #print(models) # 每个音频文件分别匹配每个模型组并得出分数放到总列表 # for model in models: # #start_time1 = time.time() # #print(model) # # 模型文件是一个元组:(label,gmm) # score = model[1].score(feat) # label=model[0] # result=(label,score) # #print(results) # predict_result.append(result) #print("Get one score ", time.time() - start_time1, " seconds") pool = ThreadPool(2) predict_result = pool.map(get_score, f_models) pool.close() pool.join() #print(results) #print("Get score ", time.time() - start_time, " seconds") proba = GMMSet.softmax([i[1] for i in predict_result]) predict_result = [(predict_result[i][0], proba[i]) for i in range(len(proba))] #print("predict_result:",predict_result) # 对预测结果按分数作高到底排序 predict_result = sorted(predict_result, key=operator.itemgetter(1), reverse=True) #print("sort_predict_result:", predict_result) # 微信语音数据集的label格式 label = os.path.basename(f).split('_')[0] #[6:11] #label=os.path.basename(f).split('(')[0]#[6:11] # AISHELL数据集的label格式 # label=os.path.basename(f)[6:11] predict = predict_result[0][0] predict_score = predict_result[0][1] print("Predict ", time.time() - start_time, " seconds") # #print('Top:',predict_result[:10]) # 统计top1准确率 if label in predict: right1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 right') else: wrong1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 wrong') # 统计Top10准确率 predicts = [] predict_scores = [] for pre in predict_result[:10]: predicts.append(pre[0]) predict_scores.append(pre[1]) if label in predicts: right += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Right') else: wrong += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Wrong') num += 1 print('top1:', num, ' right:', right1, ' wrong:', wrong1, ' top1 acc:', right1 / num) print('top10:', num, ' right:', right, ' wrong:', wrong, ' top10 acc:', right / num)
class GMMRec(object): def __init__(self): self.features = [] self.gmmset = GMMSet() self.classes = [] self.models = [] def delete_speaker(self, name): if name in self.classes: ind = self.classes.index(name) del self.classes[ind] del self.models[ind] self.classes.remove(name) ind = self.gmmset.y.index(name) del self.gmmset.gmms[ind] self.gmmset.y.remove(name) else: print name, "not in the list!" def enroll_model(self, name, model): if name not in self.classes: self.classes.append(name) self.models.append(model) self.features.append(None) gmm = self.load(model) self.gmmset.add_new(gmm, name) def enroll(self, name, mfcc_vecs, model=None): if name not in self.classes: feature = mfcc_vecs.astype(np.float32) self.features.append(feature) self.classes.append(name) self.models.append(model) else: print name + " already enrolled, please delete the old one first!" def get_mfcc(self, audio_path): (sr, sig) = wav.read(audio_path) if len(sig.shape) > 1: sig = sig[:, 0] cleansig = remove_silence(sr, sig) mfcc_vecs = mfcc(cleansig, sr, numcep=19) mfcc_delta = librosa.feature.delta(mfcc_vecs.T) mfcc_delta2 = librosa.feature.delta(mfcc_vecs.T, order=2) feats = np.vstack([mfcc_vecs.T, mfcc_delta, mfcc_delta2]) return feats.T def enroll_file(self, name, fn, model=None): if name not in self.classes: fn_mfcc = np.array(self.get_mfcc(fn)) self.enroll(name, fn_mfcc, model=model) else: print name + " already enrolled, please delete the old one first!" def _get_gmm_set(self): return GMMSet() def train(self, gmm_order=None): for name, feats, model in zip(self.classes, self.features, self.models): if (name not in self.gmmset.y) and (name is not None): gmm = self.gmmset.fit_new(feats, name, gmm_order) if model is not None: self.dump(model, part=gmm) else: print name + " already trained, skip!" def predict(self, mfcc_vecs): feature = mfcc_vecs.astype(np.float32) return self.gmmset.predict_one(feature) def dump(self, fname, part=None): with open(fname, 'w') as f: if part is None: pickle.dump(self, f, -1) else: pickle.dump(part, f, -1) @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) return R
def _get_gmm_set(self): from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) else: return GMMSet()
class GMMRec(object): def __init__(self, ubmfn = None, reject_threshold = 10): self.features = [] self.gmmset = GMMSet() self.classes = [] self.reject_threshold = reject_threshold if ubmfn is not None: self.ubm = self.load(ubmfn) def enroll(self, name, signal, fs = 44100): signal_new = remove_silence(fs, signal) hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma self.features.append(feature) self.classes.append(name) def _get_gmm_set(self): return GMMSet() def train(self): self.gmmset = self._get_gmm_set() for name, feats in zip(self.classes, self.features): self.gmmset.fit_new(feats, name) def predict(self, signal, fs = 44100): signal_new = remove_silence(fs, signal) # if len(signal_new) < len(signal) / 4: # return "Silence" hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma return self.gmmset.predict_one(feature) @staticmethod def totime(secs): m, s = divmod(secs, 60) h, m = divmod(m, 60) return h, m, s def showresult(self, signal, fs, head, disp): print("%d:%02d:%02d" % (self.totime(head)), self.predict( signal, fs)) try: if disp: display(Audio(data = signal, rate = fs)) except: pass def recognize(self, signal, step = 1, duration = 1.5, fs = 44100, disp = True): head = 0 totallen = np.round(signal.shape[0] / fs).astype(int) print('Recognition results:') while head < totallen: tail = head + duration if tail > totallen: tail = totallen signali = signal[fs * head : np.min([fs * tail, fs * totallen])] self.showresult(signali, fs, head, disp) head += step #signali = signal[fs * (head - step):] #self.showresult(signali, fs, head, disp) def dump(self, fname, part = None): with open(fname, 'wb') as f: if part is None: pickle.dump(self, f, -1) else: pickle.dump(part, f, -1) @staticmethod def load(fname): with open(fname, 'rb') as f: R = pickle.load(f) return R
class ModelInterface(object): UBM_MODEL_FILE = None def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): """ init vad from environment noise """ self.vad.init_noise(fs, signal) def filter(self, fs, signal): """ use VAD (voice activity detection) to filter out silence part of a signal """ ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: # signal is filtered by VAD return ret return np.array([]) def enroll(self, name, fs, signal): """ add the signal to this person's training dataset name: person's name """ feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): if self.UBM_MODEL_FILE and os.path.isfile(self.UBM_MODEL_FILE): try: from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) except Exception as e: print "Warning: failed to import gmmset. You may forget to compile gmm:" print e print "Try running `make -C src/gmm` to compile gmm module." print "But gmm from sklearn will work as well! Using it now!" return GMMSet() return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal): """ return a label (name) """ try: feat = mix_feature((fs, signal)) except Exception as e: print tb.format_exc() return None return self.gmmset.predict_one(feat) def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
def __init__(self): self.features = [] self.gmmset = GMMSet() self.classes = [] self.models = []
def _get_gmm_set(self): return GMMSet()
class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() # self.vad = VAD() # def init_noise(self, fs, signal): # """ # init vad from environment noise # """ # self.vad.init_noise(fs, signal) # def filter(self, fs, signal): # """ # use VAD (voice activity detection) to filter out silence part of a signal # """ # ret, intervals = self.vad.filter(fs, signal) # orig_len = len(signal) # if len(ret) > orig_len / 3: # signal is filtered by VAD # return ret #return np.array([]) def enroll(self, name, fs, signal): feat = get_feature(fs, signal) #print("feat:",feat) #print(len(feat)) self.features[name].extend(feat) def train(self): self.gmmset = GMMSet() start_time = time.time() for name, feats in self.features.items(): try: self.gmmset.fit_new(feats, name) except Exception as e: print("%s failed" % (name)) print("Train ", time.time() - start_time, " seconds") def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'wb') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() # def predict(self, fs, signal): # """ # return a label (name) # """ # try: # feat = get_feature(fs, signal) # except Exception as e: # print (e) # return self.gmmset.predict_one(feat) def predict(self, feat): """ return a label (name) """ return self.gmmset.predict_one(feat) @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R