def train_and_dump(dirs, start, end, output_model, features_save): m = ModelInterface() #print("len(dirs[start:end]):", len(dirs[start:end])) for d in dirs[start:end]: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The group wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def build_model(model_info: TModelInfo, no_initialize: bool = False) -> ModelInterface: model_type, device, kwargs = model_info return ModelInterface(model_type, device, no_initialize=no_initialize, **kwargs)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print ("No valid directory found!") sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print ("No wav file found in %s"%(d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) print("wav %s has been enrolled"%(wav)) except Exception as e: print(wav + " error %s"%(e)) m.train() m.dump(output_model)
def _cache(data: TextIO, model_name: Text, output: BinaryIO, **kwargs): cpu = require_device(prefer_cuda=False) model_type = models.select(model_name) model = ModelInterface(model_type, cpu, False) csv = util.load_csv(data) cache = {} for smiles in csv.keys(): cache_key = (smiles, ) # memcached is indexed on argument list data = model.process(smiles) cache[cache_key] = model.encode_data(data, **kwargs) pickle.dump(cache, output)
def task_mfcc_train(input_files, output_model): # 把所有mfcc特征文件统一到一个字典里面 mfcc_dic_all = {} for file in glob.glob(os.path.expanduser(input_files)): with open(file, 'rb') as f: mfcc_dic = pickle.load(f) # 合并字典 mfcc_dic_all = {**mfcc_dic, **mfcc_dic_all} #print([k for k in mfcc_dic]) # 训练并保存模型文件 m = ModelInterface() m.features = mfcc_dic_all m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def train(train_data_dir, model_path): m = ModelInterface() files = [f for f in os.listdir(train_data_dir) if re.search(r"\.wav", f)] for f in files: label, _ = f.split("_") file = os.path.join(train_data_dir, f) try: fs, signal = read_wav(file) m.enroll(label, fs, signal) logger.info("wav %s has been enrolled" % (file)) except Exception as e: logger.info(file + " error %s" % (e)) m.train() m.dump(model_path)
def task_train_single(wav_url, person_id): if os.path.exists(model): m = ModelInterface.load(model) else: m = ModelInterface() if person_id in m.features: return 'fail', 'aleady exist' #下载训练语音文件 dest_dir = train_voice_dir + person_id if not os.path.exists(dest_dir): os.makedirs(dest_dir) current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav' print(wav_url) print(dest_wav) utils.download_file(wav_url, dest_wav) #获取下载好的训练语音文件 wavs = glob.glob(dest_dir + '/*.wav') if len(wavs) == 0: return 'fail', 'no wav files under this dir' #train the wavs for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(person_id, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_single(person_id) m.dump(model) return 'success', ''
def task_enroll(): train_sounds_path = os.path.join(os.getcwd(), "trainSounds") # print('inside enroll fun') dirs = os.listdir(train_sounds_path) wavs = [] if len(dirs) == 0: print('Directory empty') else: for d in dirs: ext = os.path.splitext(d)[-1].lower() if ext == '.wav': wavs.append(d) m = ModelInterface() for w in wavs: sample_rate, signal = read_wav(os.path.join(train_sounds_path, w)) label = os.path.splitext(w)[0] m.enroll(label, sample_rate, signal) print(label + ' enrolled') m.train() m.dump('data.bin')
def task_train_full(input_dirs, output_model): m = ModelInterface() #get all the subdir train_dir = [] for subdirs in os.walk(input_dirs): train_dir.append(subdirs[0]) train_dir.remove(train_dir[0]) #去掉本身根目录 #input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] #train_dir = itertools.chain(*(glob.glob(d) for d in input_dirs)) #train_dir = [d for d in train_dir if os.path.isdir(d)] files = [] if len(train_dir) == 0: print("No valid directory found!") return 'fail', 'No valid directory found!' for d in train_dir: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(label, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_full() m.dump(output_model) return 'success', ''
def task_enroll(input_dirs, output_model, features_save=None, group_person_num=None): m = ModelInterface() # 把输入的多个目录字符串分离为目录列表 input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] # 把各个目录下的子目录列表解压出来组合成一个迭代器 dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) # 生成包括所有子目录的列表 dirs = [d for d in dirs if os.path.isdir(d)] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("edytor.ui", self) self.statusBar() self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata = [] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('avatar/') # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = wavfile.read("bg.wav") self.backend.init_noise(fs, signal) except: pass
def train(directory: Text, model_name: Text, batch_size: int, learning_rate: float, epsilon: float, cuda: bool, train_with_test: bool, min_iteration: int, max_iteration: int, ndrop: Optional[float] = None, **kwargs) -> None: # filter out options that are not set in command line kwargs = util.dict_filter(kwargs, lambda k, v: v is not None) data_folder = Path(directory) assert data_folder.is_dir(), 'Invalid data folder' dev = require_device(cuda) for fold in sorted(data_folder.iterdir()): log.info(f'Processing "{fold}"...') # model & optimizer model_type = models.select(model_name) # see models/__init__.py model = ModelInterface(model_type, dev, **kwargs) optimizer = torch.optim.Adam(params=model.inst.parameters(), lr=learning_rate) # load the fold raw = [ util.load_csv(fold / name) for name in ['train.csv', 'test.csv', 'dev.csv'] ] # let the model parse these molecules data = [] for i in range(len(raw)): buf = [] for smiles, activity in raw[i].items(): obj = model.process(smiles) buf.append(Item(obj, activity)) data.append(buf) log.debug(f'atom_map: {model.atom_map}') test_batch, _test_label = util.separate_items(data[1]) test_label = torch.tensor(_test_label) # training phase train_data = data[0] + data[1] if train_with_test else data[0] # set up to randomly drop negative samples # see util.RandomIterator for details drop_prob = ndrop if ndrop is not None else 0 drop_fn = lambda x: drop_prob if x.activity == 0 else 0 data_ptr = util.RandomIterator( train_data, drop_fn=drop_fn if ndrop is not None else None) countdown = min_iteration min_loss = 1e99 # track history minimal loss sum_loss, batch_cnt = 0.0, 0 for _ in range(max_iteration): # generate batch batch, _label = util.separate_items(data_ptr.iterate(batch_size)) label = torch.tensor(_label) # train a mini-batch batch_loss = train_step(model, optimizer, batch, label) sum_loss += batch_loss batch_cnt += 1 # log.debug(f'{batch_loss}, {sum_loss}') # convergence test if data_ptr.is_cycled(): loss = sum_loss / batch_cnt pred = model.predict(test_batch) log.debug( f'{util.stat_string(_test_label, pred)}. loss={loss},min={min_loss}' ) if countdown <= 0 and abs(min_loss - loss) < epsilon: log.debug('Converged.') break countdown -= 1 min_loss = min(min_loss, loss) sum_loss, batch_cnt = 0.0, 0 # model evaluation on `dev.csv` roc_auc, prc_auc = evaluate_model(model, data[2]) log.info(f'ROC-AUC: {roc_auc}') log.info(f'PRC-AUC: {prc_auc}')
parser.add_argument("--task", required=True, choices=["enroll", "predict"], help='Task to do. Either "enroll" or "predict"') args = parser.parse_args() if args.task == "predict" and not os.path.isfile(args.model_path): raise ValueError("Please provide valid model path") r = sr.Recognizer() r.pause_threshold = 1.0 if args.task == "enroll": if os.path.isfile(args.model_path) and not args.overwrite_model: model = ModelInterface.load(args.model_path) else: model = ModelInterface() print("***** Enroll sound data for one speaker *****") name = input("Enter your name: ") name = name.strip() print( f"Hello {name}. Please input your voice {args.num_samples} times") with tempfile.TemporaryDirectory() as tempdir: i = 1 while i <= args.num_samples: with sr.Microphone() as source: audio = r.listen(source) # Generate random filename filename = os.path.join( tempdir, name + "_" + str(uuid.uuid1()) + ".wav") with open(filename, "wb") as file: file.write(audio.get_wav_data(convert_rate=16000))
def task_enroll( input_dirs, output_model, isDynamic ): # Possible improvement : Store VAD signal of each .wav file instead of calculate it twice for dynamic threshold (useless for static) """ Enroll the speaker inside the GMM model. Args: input_dirs (string): directory of the database output_model (string): path of the output model """ # Creates the model object # m = ModelInterface() # Extracts the absolute path from 'input_dirs' # # If input_dirs is an array of directory, it is adapted by the second line # print(input_dirs) input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] if len(dirs) == 0: print('No valid directory found!') sys.exit(1) # Starts the enrollment of the valid directories # start_time = time.time() print('Starting enrollment') for d in dirs: print(d) # Retrieves the label of the current directory name and loads .wav files are stored # label = os.path.basename(d.rstrip('/')) print(label) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print('No wav file found in %s' % (d)) continue for wav in wavs: # Audio processing of the .wav file # # Retrieves sampling rate (fs), signal values # # VAD removes silence inside the signal # # Enrolls the cleared signal and its label inside the model # try: fs, signal = read_wav(wav) signal = signal / max(abs(signal)) m.enroll(label, fs, VAD_process(signal)) except Exception as e: print(wav + ' error %s' % (e)) # Starts the training of the model using the enrolled signals # print('Enrollment finished\nTraining started') m.train() print('Training finished') # Starts a dynamic threshold computation # # /!\ Warning : computation takes a lot of time /!\ # if (isDynamic): print('Dynamic Threshold started') i = 0 for d in dirs: # Loads the .wav files # # Each .wav file will be used to compute a score for each existing label after training # if len(wavs) == 0: print('No wav file found in %s' % (d)) continue wavs = glob.glob(d + '/*.wav') for wav in wavs: try: fs, signal = read_wav(wav) signal = signal / max(abs(signal)) m.dynamic_threshold(fs, VAD_process(signal)) except Exception as e: print(wav + ' error %s' % (e)) i += 1 for j in range(0, 100, 10): if (i == int(len(dirs) * j * 0.01)): print('%i percent done.' % (j)) # Keeps only the mean from all scores for a given label as dynamic threshold # try: m.dynamic_mean() except Exception as e: print('Error for dyanmic threshold : error %e' % (e)) print('Dynamic threshold finished') print(time.time() - start_time, " seconds") # Saves the model at the specified path 'output_model' # m.dump(output_model)
print(" + Predicting") input_model = "model.out" input_files = "./tmp.wav" # m = ModelInterface.load(input_model) m = model for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label = m.predict(fs, signal) # print max(label, key=operator.itemgetter(1)) if abs(label[0][1] - label[1][1]) > .0000002: speakers_detected = [x for x in label if x[1] >= -.1] if len(speakers_detected) > 0: print max(speakers_detected, key=operator.itemgetter(1)) else: print "___ not similar enough to know speaker"," Best guess:", max(label, key=operator.itemgetter(1)) else: print "... predictions too similar"," Best guess:", max(label, key=operator.itemgetter(1)) mods = ModelInterface() mods = mods.load("./model.out") see_model(mods) import operator # find_user() print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" for i in range(1,60): find_user(mods)