def load_norm_ivecs(self): """ Load normalization i-vectors, scale and shift files and also pretrained model. :returns: i-vectors :rtype: numpy.array """ line = None with open(self.norm_list, 'r') as f: for line in f: line = line.rstrip() loginfo( '[Diarization.load_norm_ivecs] Loading npy file {} ...'. format(line)) try: yield np.load('{}.npy'.format( os.path.join(self.ivecs_dir, line))).flatten() except IOError: logwarning( '[Diarization.load_norm_ivecs] No pickle file found for {}.' .format(line)) self.scale = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'scale.npy')) self.shift = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'shift.npy')) try: with open( os.path.join(self.ivecs_dir, os.path.dirname(line), 'model.pkl')) as f: self.model = pickle.load(f) except IOError: logwarning( '[Diarization.load_norm_ivecs] No pretrained model found.')
def dump_rttm(self, scores): for ivecset in self.ivecs: if ivecset.size() > 0: name = ivecset.name # dirty trick, will be removed, watch out if 'beamformed' in ivecset.name: ivecset.name = re.sub('beamformed/', '', ivecset.name) # # # # # # # # # # # # # # # # # # # # # reg_name = re.sub('/.*', '', ivecset.name) Tools.mkdir_p(os.path.join(self.out_dir, os.path.dirname(name))) with open(os.path.join(self.out_dir, name + '.rttm'), 'w') as f: for i, ivec in enumerate(ivecset.ivecs): start, end = ivec.window_start, ivec.window_end idx = np.argmax(scores[name].T[i]) f.write( 'SPEAKER {} 1 {} {} <NA> <NA> {}_spkr_{} <NA>\n'. format(reg_name, float(start / 1000.0), float((end - start) / 1000.0), reg_name, idx)) else: logwarning( '[Diarization.dump_rttm] No i-vectors to dump in {}.'. format(ivecset.name))
def score(self): scores_dict = {} for ivecset in self.ivecs: name = os.path.normpath(ivecset.name) ivecs = ivecset.get_all() loginfo('[Diarization.score] Scoring {} ...'.format(name)) size = ivecset.size() if size > 0: if ivecset.num_speakers is not None: num_speakers = min(ivecset.num_speakers, size) sklearnkmeans = sklearnKMeans( n_clusters=num_speakers).fit(ivecs) centroids = KMeans(sklearnkmeans.cluster_centers_, num_speakers, self.plda).fit(ivecs) else: num_speakers, centroids = self.get_num_speakers(ivecs) if self.norm_list is None: scores_dict[name] = self.plda.score( ivecs, centroids, self.scale, self.shift) else: scores_dict[name] = self.s_norm(ivecs, centroids) else: logwarning( '[Diarization.score] No i-vectors to score in {}.'.format( ivecset.name)) return scores_dict
def load_ivecs(self): with open(self.input_list, 'r') as f: for line in f: loginfo('[Diarization.load_ivecs] Loading pickle file {} ...'. format(line.rstrip().split()[0])) line = line.rstrip() try: if len(line.split()) == 1: with open(os.path.join(self.ivecs_dir, line + '.pkl')) as i: yield pickle.load(i) elif len(line.split()) == 2: file_name = line.split()[0] num_spks = int(line.split()[1]) with open( os.path.join(self.ivecs_dir, file_name + '.pkl')) as i: ivec_set = pickle.load(i) ivec_set.num_speakers = num_spks yield ivec_set else: raise DiarizationException( '[Diarization.load_ivecs] Unexpected number of columns in input list {}.' .format(self.input_list)) except IOError: logwarning( '[Diarization.load_ivecs] No pickle file found for {}.' .format(line.rstrip().split()[0]))
def get_der(self, ref_file, scores): """ Compute Diarization Error Rate from reference and scores. :param ref_file: path to file with diarization reference :type ref_file: str :param scores: input scores from PLDA model :type scores: numpy.array """ ref, hyp = self.init_annotations() with open(ref_file, 'r') as f: for line in f: _, name, _, start, duration, _, _, speaker, _ = line.split() ref[name][Segment(float(start), float(start) + float(duration))] = speaker for ivecset in self.ivecs: if ivecset.size() > 0: name, reg_name = ivecset.name, ivecset.name # dirty trick, will be removed, watch out if 'beamformed' in name: reg_name = re.sub('beamformed/', '', name) # # # # # # # # # # # # # # # # # # # # # reg_name = re.sub('/.*', '', reg_name) for i, ivec in enumerate(ivecset.ivecs): start, end = ivec.window_start / 1000.0, ivec.window_end / 1000.0 hyp[reg_name][Segment(start, end)] = np.argmax(scores[name].T[i]) else: logwarning( '[Diarization.get_der] No i-vectors to dump in {}.'.format( ivecset.name)) der = DiarizationErrorRate() der.collar = 0.25 names, values, summ = [], [], 0.0 for name in ref.keys(): names.append(name) der_num = der(ref[name], hyp[name]) * 100 values.append(der_num) summ += der_num loginfo('[Diarization.get_der] {} DER = {}'.format( name, '{0:.3f}'.format(der_num))) loginfo('[Diarization.get_der] Average DER = {}'.format( '{0:.3f}'.format(summ / float(len(ref.keys()))))) Diarization.plot_der(names, values)
def score(self): """ Score i-vectors agains speaker clusters. :returns: PLDA scores :rtype: numpy.array """ scores_dict = {} for ivecset in self.ivecs: name = os.path.normpath(ivecset.name) ivecs = ivecset.get_all() loginfo('[Diarization.score] Scoring {} ...'.format(name)) size = ivecset.size() if size > 0: if ivecset.num_speakers is not None: num_speakers = ivecset.num_speakers sklearnkmeans = sklearnKMeans( n_clusters=num_speakers).fit(ivecs) centroids = KMeans(sklearnkmeans.cluster_centers_, num_speakers, self.plda).fit(ivecs) else: if self.norm_ivecs is not None: num_speakers, centroids = self.get_num_speakers(ivecs) else: raise DiarizationException( '[Diarization.score] Can not estimate number of speakers without training set.' ) if self.norm_list is None: scores_dict[name] = self.plda.score( ivecs, centroids, self.scale, self.shift) else: scores_dict[name] = self.s_norm(ivecs, centroids) else: logwarning( '[Diarization.score] No i-vectors to score in {}.'.format( ivecset.name)) return scores_dict