Example #1
0
    def __init__(self, spk_id, model, pre_model_dir="pre-models", threshold=0.0):
        
        self.pre_model_dir = os.path.abspath(pre_model_dir)

        self.spk_id = os.path.abspath(spk_id)
        if not os.path.exists(self.spk_id):
            os.makedirs(self.spk_id)

        self.audio_dir = os.path.abspath(self.spk_id + "/audio")
        self.mfcc_dir = os.path.abspath(self.spk_id + "/mfcc")
        self.log_dir = os.path.abspath(self.spk_id + "/log")
        self.ivector_dir = os.path.abspath(self.spk_id + "/ivector")

        self.threshold = threshold

        self.utt_id = model[1]
        self.identity_location = model[2]
        self.z_norm_mean = model[3] 
        self.z_norm_std = model[4]

        self.train_ivector_scp = self.spk_id + "/ivector.scp"
        np.savetxt(self.train_ivector_scp, np.concatenate((np.array([self.utt_id])[:, np.newaxis], np.array([self.identity_location])[:, np.newaxis]), axis=1), fmt="%s")

        self.kaldi_helper = ivector_PLDA_kaldiHelper(pre_model_dir=self.pre_model_dir, audio_dir=self.audio_dir,
                                                mfcc_dir=self.mfcc_dir, log_dir=self.log_dir,
                                                ivector_dir=self.ivector_dir)
Example #2
0
    def __init__(self,
                 group_id,
                 model_list,
                 pre_model_dir="pre-models",
                 threshold=0.0):

        self.pre_model_dir = os.path.abspath(pre_model_dir)

        self.group_id = os.path.abspath(group_id)
        if not os.path.exists(self.group_id):
            os.makedirs(self.group_id)

        self.audio_dir = os.path.abspath(self.group_id + "/audio")
        self.mfcc_dir = os.path.abspath(self.group_id + "/mfcc")
        self.log_dir = os.path.abspath(self.group_id + "/log")
        self.ivector_dir = os.path.abspath(self.group_id + "/ivector")

        self.threshold = threshold

        self.n_speakers = len(model_list)
        self.spk_ids = []
        self.utt_ids = []
        self.identity_locations = []
        self.z_norm_means = np.zeros(self.n_speakers, dtype=np.float64)
        self.z_norm_stds = np.zeros(self.n_speakers, dtype=np.float64)

        for i, model in enumerate(model_list):

            spk_id = model[0]
            utt_id = model[1]
            identity_location = model[2]
            mean = model[3]
            std = model[4]

            self.spk_ids.append(spk_id)
            self.utt_ids.append(utt_id)
            self.identity_locations.append(identity_location)
            self.z_norm_means[i] = mean
            self.z_norm_stds[i] = std
        ''' make sure self.ids is in order, otherwise kaldi may oder them, which may leads to wrong results
        '''
        self.spk_ids, self.utt_ids, self.identity_locations, self.z_norm_means, self.z_norm_stds = \
            self.order(self.spk_ids, self.utt_ids, self.identity_locations, self.z_norm_means, self.z_norm_stds)

        self.train_ivector_scp = self.group_id + "/ivector.scp"
        np.savetxt(self.train_ivector_scp,
                   np.concatenate(
                       (np.array(self.utt_ids)[:, np.newaxis],
                        np.array(self.identity_locations)[:, np.newaxis]),
                       axis=1),
                   fmt="%s")

        self.kaldi_helper = ivector_PLDA_kaldiHelper(
            pre_model_dir=self.pre_model_dir,
            audio_dir=self.audio_dir,
            mfcc_dir=self.mfcc_dir,
            log_dir=self.log_dir,
            ivector_dir=self.ivector_dir)
Example #3
0
    def score(self, audio_list, fs=16000, bits_per_sample=16, n_jobs=10, debug=False):

        if os.path.exists(self.audio_dir):
            shutil.rmtree(self.audio_dir)
        if os.path.exists(self.mfcc_dir):
            shutil.rmtree(self.mfcc_dir)
        if os.path.exists(self.log_dir):
            shutil.rmtree(self.log_dir)
        if os.path.exists(self.ivector_dir):
            shutil.rmtree(self.ivector_dir)

        if not os.path.exists(self.audio_dir):
            os.makedirs(self.audio_dir)
        if not os.path.exists(self.mfcc_dir):
            os.makedirs(self.mfcc_dir)
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        if not os.path.exists(self.ivector_dir):
            os.makedirs(self.ivector_dir)
        
        if isinstance(audio_list, np.ndarray):
            if len(audio_list.shape) == 1 or (len(audio_list.shape) == 2 and (audio_list.shape[0] == 1 or audio_list.shape[1] == 1)):
                audio_list = [audio_list]
            else:
                audio_list = [audio_list[:, i] for i in range(audio_list.shape[1])]
        
        else:
            audio_list = copy.deepcopy(audio_list) # avoid influencing
        
        for i, audio in enumerate(audio_list):
            if not audio.dtype == np.int16:
                audio_list[i] = (audio * (2 ** (bits_per_sample - 1))).astype(np.int16)
        
        kaldi_helper = ivector_PLDA_kaldiHelper(pre_model_dir=self.pre_model_dir, audio_dir=self.audio_dir, mfcc_dir=self.mfcc_dir, log_dir=self.log_dir, ivector_dir=self.ivector_dir)

        score_array = kaldi_helper.score(audio_list, self.utt_ids, n_jobs=n_jobs, flag=1, train_ivector_scp=self.train_ivector_scp, debug=debug)

        score_array = (score_array - self.z_norm_means) / self.z_norm_stds

        return score_array # (n_audios, n_spks) or (n_spks, )
Example #4
0
    path = os.path.join(z_norm_dir, audio_name)
    z_norm_utt_path.append(path)
    z_norm_utt_id.append(utt_id)
    z_norm_spk_id.append(spk_id)

audio_path_list = (enroll_utt_path + z_norm_utt_path)
spk_id_list = (enroll_spk_id + z_norm_spk_id)
utt_id_list = (enroll_utt_id + z_norm_utt_id)
''' step 1: generate ivector identity (stored in ivector_dir) and corresponding speaker model (stored as model/XX.iv)
'''
print(
    "----- step 1: generate ivector identity and corresponding speaker model -----"
)

iv_helper = ivector_PLDA_kaldiHelper(audio_dir=audio_dir,
                                     mfcc_dir=mfcc_dir,
                                     log_dir=log_dir,
                                     ivector_dir=ivector_dir)

print("--- extracting and scoring ---")
iv_helper.score_existing(audio_path_list,
                         enroll_utt_id,
                         spk_id_list=spk_id_list,
                         utt_id_list=utt_id_list,
                         test_utt_id=z_norm_utt_id,
                         n_jobs=n_jobs,
                         flag=1,
                         debug=debug)

print("--- extracting and scoring done---")

print("--- resolve score and obtain z norm mean and std value ---")