def __init__(self, spk_id, model, pre_model_dir="pre-models", threshold=0.0): self.pre_model_dir = os.path.abspath(pre_model_dir) self.spk_id = os.path.abspath(spk_id) if not os.path.exists(self.spk_id): os.makedirs(self.spk_id) self.audio_dir = os.path.abspath(self.spk_id + "/audio") self.mfcc_dir = os.path.abspath(self.spk_id + "/mfcc") self.log_dir = os.path.abspath(self.spk_id + "/log") self.ivector_dir = os.path.abspath(self.spk_id + "/ivector") self.threshold = threshold self.utt_id = model[1] self.identity_location = model[2] self.z_norm_mean = model[3] self.z_norm_std = model[4] self.train_ivector_scp = self.spk_id + "/ivector.scp" np.savetxt(self.train_ivector_scp, np.concatenate((np.array([self.utt_id])[:, np.newaxis], np.array([self.identity_location])[:, np.newaxis]), axis=1), fmt="%s") self.kaldi_helper = ivector_PLDA_kaldiHelper(pre_model_dir=self.pre_model_dir, audio_dir=self.audio_dir, mfcc_dir=self.mfcc_dir, log_dir=self.log_dir, ivector_dir=self.ivector_dir)
def __init__(self, group_id, model_list, pre_model_dir="pre-models", threshold=0.0): self.pre_model_dir = os.path.abspath(pre_model_dir) self.group_id = os.path.abspath(group_id) if not os.path.exists(self.group_id): os.makedirs(self.group_id) self.audio_dir = os.path.abspath(self.group_id + "/audio") self.mfcc_dir = os.path.abspath(self.group_id + "/mfcc") self.log_dir = os.path.abspath(self.group_id + "/log") self.ivector_dir = os.path.abspath(self.group_id + "/ivector") self.threshold = threshold self.n_speakers = len(model_list) self.spk_ids = [] self.utt_ids = [] self.identity_locations = [] self.z_norm_means = np.zeros(self.n_speakers, dtype=np.float64) self.z_norm_stds = np.zeros(self.n_speakers, dtype=np.float64) for i, model in enumerate(model_list): spk_id = model[0] utt_id = model[1] identity_location = model[2] mean = model[3] std = model[4] self.spk_ids.append(spk_id) self.utt_ids.append(utt_id) self.identity_locations.append(identity_location) self.z_norm_means[i] = mean self.z_norm_stds[i] = std ''' make sure self.ids is in order, otherwise kaldi may oder them, which may leads to wrong results ''' self.spk_ids, self.utt_ids, self.identity_locations, self.z_norm_means, self.z_norm_stds = \ self.order(self.spk_ids, self.utt_ids, self.identity_locations, self.z_norm_means, self.z_norm_stds) self.train_ivector_scp = self.group_id + "/ivector.scp" np.savetxt(self.train_ivector_scp, np.concatenate( (np.array(self.utt_ids)[:, np.newaxis], np.array(self.identity_locations)[:, np.newaxis]), axis=1), fmt="%s") self.kaldi_helper = ivector_PLDA_kaldiHelper( pre_model_dir=self.pre_model_dir, audio_dir=self.audio_dir, mfcc_dir=self.mfcc_dir, log_dir=self.log_dir, ivector_dir=self.ivector_dir)
def score(self, audio_list, fs=16000, bits_per_sample=16, n_jobs=10, debug=False): if os.path.exists(self.audio_dir): shutil.rmtree(self.audio_dir) if os.path.exists(self.mfcc_dir): shutil.rmtree(self.mfcc_dir) if os.path.exists(self.log_dir): shutil.rmtree(self.log_dir) if os.path.exists(self.ivector_dir): shutil.rmtree(self.ivector_dir) if not os.path.exists(self.audio_dir): os.makedirs(self.audio_dir) if not os.path.exists(self.mfcc_dir): os.makedirs(self.mfcc_dir) if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) if not os.path.exists(self.ivector_dir): os.makedirs(self.ivector_dir) if isinstance(audio_list, np.ndarray): if len(audio_list.shape) == 1 or (len(audio_list.shape) == 2 and (audio_list.shape[0] == 1 or audio_list.shape[1] == 1)): audio_list = [audio_list] else: audio_list = [audio_list[:, i] for i in range(audio_list.shape[1])] else: audio_list = copy.deepcopy(audio_list) # avoid influencing for i, audio in enumerate(audio_list): if not audio.dtype == np.int16: audio_list[i] = (audio * (2 ** (bits_per_sample - 1))).astype(np.int16) kaldi_helper = ivector_PLDA_kaldiHelper(pre_model_dir=self.pre_model_dir, audio_dir=self.audio_dir, mfcc_dir=self.mfcc_dir, log_dir=self.log_dir, ivector_dir=self.ivector_dir) score_array = kaldi_helper.score(audio_list, self.utt_ids, n_jobs=n_jobs, flag=1, train_ivector_scp=self.train_ivector_scp, debug=debug) score_array = (score_array - self.z_norm_means) / self.z_norm_stds return score_array # (n_audios, n_spks) or (n_spks, )
path = os.path.join(z_norm_dir, audio_name) z_norm_utt_path.append(path) z_norm_utt_id.append(utt_id) z_norm_spk_id.append(spk_id) audio_path_list = (enroll_utt_path + z_norm_utt_path) spk_id_list = (enroll_spk_id + z_norm_spk_id) utt_id_list = (enroll_utt_id + z_norm_utt_id) ''' step 1: generate ivector identity (stored in ivector_dir) and corresponding speaker model (stored as model/XX.iv) ''' print( "----- step 1: generate ivector identity and corresponding speaker model -----" ) iv_helper = ivector_PLDA_kaldiHelper(audio_dir=audio_dir, mfcc_dir=mfcc_dir, log_dir=log_dir, ivector_dir=ivector_dir) print("--- extracting and scoring ---") iv_helper.score_existing(audio_path_list, enroll_utt_id, spk_id_list=spk_id_list, utt_id_list=utt_id_list, test_utt_id=z_norm_utt_id, n_jobs=n_jobs, flag=1, debug=debug) print("--- extracting and scoring done---") print("--- resolve score and obtain z norm mean and std value ---")