def train_and_dump(dirs, start, end, output_model, features_save): m = ModelInterface() #print("len(dirs[start:end]):", len(dirs[start:end])) for d in dirs[start:end]: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The group wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def task_enroll(input_dirs, output_model, features_save=None, group_person_num=None): m = ModelInterface() # 把输入的多个目录字符串分离为目录列表 input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] # 把各个目录下的子目录列表解压出来组合成一个迭代器 dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) # 生成包括所有子目录的列表 dirs = [d for d in dirs if os.path.isdir(d)] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)