def create_seg_viterbi(self, cep, segment_dir): #viterbi resegmentation for file_name in os.listdir(segment_dir): diar = Diar.read_seg(os.path.join(segment_dir, file_name)) vit_diar = viterbi.viterbi_decoding(cep, diar, self.vit_penalty) Diar.write_seg( os.path.join(self.results_vit_dir, file_name + '.viterbi.{:.2f}'.format(-250)), vit_diar)
def create_seg_bic_hac(self, cep, segment_dir): for file_name in os.listdir(segment_dir): try: diar = Diar.read_seg(os.path.join(segment_dir, file_name)) for bic_value in np.linspace(self.bic_hac_start, self.bic_hac_end, self.bic_hac_num): bic = hac_bic.HAC_BIC(cep, diar, bic_value, sr=False) bic_hac_diar = bic.perform(to_the_end=True) Diar.write_seg(os.path.join(self.bic_hac_dir, file_name+'.bic_value.{:.2f}'.format(bic_value))\ , bic_hac_diar) except Exception as e: traceback.print_exc() continue
def train(self): try: init_diar = Diar.read_seg(self.input_seg) #init_diar = segmentation.self.init_seg(cep, show) init_diar.pack(50) Diar.write_seg(self.init_seg, init_diar) gd_diar = segmentation.segmentation(self.cep, init_diar, self.win_size) Diar.write_seg(self.gd_seg, gd_diar) except Exception as e: traceback.print_exec() print("initialziation fault") #performing experiment self.create_seg_bic_linear(self.cep, gd_diar) self.create_seg_bic_hac(self.cep, self.linear_bic_dir) self.create_seg_iv_AHC(self.bic_hac_dir, self.input_show) self.create_seg_viterbi(self.cep, self.hac_iv_dir)
def create_seg_iv_AHC(self, segment_dir, input_show): model_iv = ModelIV(self.model_fn) #print(segment_dir) for file_name in os.listdir(segment_dir): try: segment_diar = Diar.read_seg( os.path.join(segment_dir, file_name)) #print(segment_diar) model = self.train_ivectors(model_iv, self.mfcc_dir, file_name, segment_diar, self.input_show) scores = self.score_plda(model) for hac_value in np.linspace(self.t_min, self.t_max, self.t_num): diar_iv, _, _ = hac_iv(segment_diar, scores, threshold=hac_value) Diar.write_seg(os.path.join(self.hac_iv_dir, file_name+'.hac_value.{:.2f}'.format(hac_value))\ , diar_iv) except Exception as e: traceback.print_exc() print("There is an error over here") continue
plda_seg_fn = './data/seg/train.plda.seg' rank_plda = 150 it_max_plda = 10 mfcc_plda_fn = './data/mfcc/norm_plda.h5' plda_idmap_fn = './data/mfcc/plda_idmap.h5' plda_fn = './data/model/plda_'+str(rank_tv)+'_'+str(rank_plda)+'.h5' norm_stat_fn = './data/model/norm.stat.h5' norm_fn = './data/model/norm.h5' norm_iv_fn = './data/model/norm.iv.h5' matrices_fn = './data/model/matrices.h5' model_fn = './data/model/ester_model_{}_{}_{}.h5'.format(nb_gauss, rank_tv, rank_plda) logging.info('Computing MFCC for UBM') diar_ubm = Diar.read_seg(ubm_seg_fn, normalize_cluster=True) fe = get_feature_extractor(audio_dir, 'sid') ubm_idmap = fe.save_multispeakers(diar_ubm.id_map(), output_feature_filename=mfcc_ubm_fn, keep_all=False) ubm_idmap.write_txt(ubm_idmap_fn) fs = get_feature_server(mfcc_ubm_fn, 'sid') spk_lst = ubm_idmap.rightids ubm = Mixture() ubm.EM_split(fs, spk_lst, nb_gauss, iterations=(1, 2, 2, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8), num_thread=num_thread, llk_gain=0.01) ubm.write(ubm_fn, prefix='ubm/') logging.info('Computing MFCC for TV') diar_tv = Diar.read_seg(tv_seg_fn, normalize_cluster=True)
def pyAudioDiar(): duration, result = aS.speaker_diarization(labelFileNameSound.get(), int(labelNumberOfSpeakers.get()), lda_dim=0, plot_res=False) show = 'diarizationExample' input_show = labelFileNameSound.get() input_sad = None win_size = 250 thr_l = 2 thr_h = 3 thr_vit = -250 wdir = os.path.join('out', show) if not os.path.exists(wdir): os.makedirs(wdir) fs = get_feature_server(input_show, feature_server_type='basic') cep, _ = fs.load(show) cep.shape if input_sad is not None: init_diar = Diar.read_seg(input_sad) init_diar.pack(50) else: init_diar = segmentation.init_seg(cep, show) seg_diar = segmentation.segmentation(cep, init_diar, win_size) bicl_diar = segmentation.bic_linear(cep, seg_diar, thr_l, sr=False) bic = hac_bic.HAC_BIC(cep, bicl_diar, thr_h, sr=False) bich_diar = bic.perform(to_the_end=True) vit_diar = viterbi.viterbi_decoding(cep, bich_diar, thr_vit) resList = [] currentPosition = 0 for row in vit_diar: speakerValue = int(row[1][1:]) while currentPosition < (row[3] + row[4]): resList.append(speakerValue) currentPosition += 20 currentPosition = 0 realityList = [] realityFile = pd.read_csv(labelFileNameSegment.get(), delimiter='\t', encoding='utf-8', names=['start', 'end', 'speaker']) for index, row in realityFile.iterrows(): speakerValue = int(row['speaker'][1:]) while currentPosition < row['end']: realityList.append(int(speakerValue)) currentPosition += 0.2 plot.subplot(3, 1, 2) plot.title("s4d:") plot.plot(np.arange(0, duration, duration / len(resList)), resList, 'ro') plot.subplot(3, 1, 1) plot.title("Реальность:") plot.plot(np.arange(0, duration, duration / len(realityList)), realityList, 'bo') plot.subplot(3, 1, 3) plot.title("pyPlotAudio:") plot.plot(np.arange(0, duration, duration / len(result)), result, 'go') plot.show()