コード例 #1
0
 def create_seg_viterbi(self, cep, segment_dir):
     #viterbi resegmentation
     for file_name in os.listdir(segment_dir):
         diar = Diar.read_seg(os.path.join(segment_dir, file_name))
         vit_diar = viterbi.viterbi_decoding(cep, diar, self.vit_penalty)
         Diar.write_seg(
             os.path.join(self.results_vit_dir,
                          file_name + '.viterbi.{:.2f}'.format(-250)),
             vit_diar)
コード例 #2
0
 def create_seg_bic_hac(self, cep, segment_dir):
     for file_name in os.listdir(segment_dir):
         try:
             diar = Diar.read_seg(os.path.join(segment_dir, file_name))
             for bic_value in np.linspace(self.bic_hac_start,
                                          self.bic_hac_end,
                                          self.bic_hac_num):
                 bic = hac_bic.HAC_BIC(cep, diar, bic_value, sr=False)
                 bic_hac_diar = bic.perform(to_the_end=True)
                 Diar.write_seg(os.path.join(self.bic_hac_dir, file_name+'.bic_value.{:.2f}'.format(bic_value))\
                             , bic_hac_diar)
         except Exception as e:
             traceback.print_exc()
             continue
コード例 #3
0
    def train(self):
        try:
            init_diar = Diar.read_seg(self.input_seg)
            #init_diar = segmentation.self.init_seg(cep, show)
            init_diar.pack(50)
            Diar.write_seg(self.init_seg, init_diar)
            gd_diar = segmentation.segmentation(self.cep, init_diar,
                                                self.win_size)
            Diar.write_seg(self.gd_seg, gd_diar)
        except Exception as e:
            traceback.print_exec()
            print("initialziation fault")

        #performing experiment
        self.create_seg_bic_linear(self.cep, gd_diar)
        self.create_seg_bic_hac(self.cep, self.linear_bic_dir)
        self.create_seg_iv_AHC(self.bic_hac_dir, self.input_show)
        self.create_seg_viterbi(self.cep, self.hac_iv_dir)
コード例 #4
0
 def create_seg_iv_AHC(self, segment_dir, input_show):
     model_iv = ModelIV(self.model_fn)
     #print(segment_dir)
     for file_name in os.listdir(segment_dir):
         try:
             segment_diar = Diar.read_seg(
                 os.path.join(segment_dir, file_name))
             #print(segment_diar)
             model = self.train_ivectors(model_iv, self.mfcc_dir, file_name,
                                         segment_diar, self.input_show)
             scores = self.score_plda(model)
             for hac_value in np.linspace(self.t_min, self.t_max,
                                          self.t_num):
                 diar_iv, _, _ = hac_iv(segment_diar,
                                        scores,
                                        threshold=hac_value)
                 Diar.write_seg(os.path.join(self.hac_iv_dir, file_name+'.hac_value.{:.2f}'.format(hac_value))\
                             , diar_iv)
         except Exception as e:
             traceback.print_exc()
             print("There is an error over here")
             continue
コード例 #5
0
ファイル: train.py プロジェクト: CornSnak3/SummerPractice
plda_seg_fn = './data/seg/train.plda.seg'
rank_plda = 150
it_max_plda = 10
mfcc_plda_fn = './data/mfcc/norm_plda.h5'
plda_idmap_fn = './data/mfcc/plda_idmap.h5'
plda_fn = './data/model/plda_'+str(rank_tv)+'_'+str(rank_plda)+'.h5'
norm_stat_fn = './data/model/norm.stat.h5'
norm_fn = './data/model/norm.h5'
norm_iv_fn = './data/model/norm.iv.h5'


matrices_fn = './data/model/matrices.h5'
model_fn = './data/model/ester_model_{}_{}_{}.h5'.format(nb_gauss, rank_tv, rank_plda)

logging.info('Computing MFCC for UBM')
diar_ubm = Diar.read_seg(ubm_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
ubm_idmap = fe.save_multispeakers(diar_ubm.id_map(), output_feature_filename=mfcc_ubm_fn, keep_all=False)
ubm_idmap.write_txt(ubm_idmap_fn)

fs = get_feature_server(mfcc_ubm_fn, 'sid')

spk_lst = ubm_idmap.rightids
ubm = Mixture()
ubm.EM_split(fs, spk_lst, nb_gauss,
             iterations=(1, 2, 2, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8), num_thread=num_thread,
             llk_gain=0.01)
ubm.write(ubm_fn, prefix='ubm/')

logging.info('Computing MFCC for TV')
diar_tv = Diar.read_seg(tv_seg_fn, normalize_cluster=True)
コード例 #6
0
ファイル: main.py プロジェクト: CornSnak3/SummerPractice
def pyAudioDiar():
    duration, result = aS.speaker_diarization(labelFileNameSound.get(),
                                              int(labelNumberOfSpeakers.get()),
                                              lda_dim=0,
                                              plot_res=False)
    show = 'diarizationExample'
    input_show = labelFileNameSound.get()
    input_sad = None
    win_size = 250
    thr_l = 2
    thr_h = 3
    thr_vit = -250
    wdir = os.path.join('out', show)
    if not os.path.exists(wdir):
        os.makedirs(wdir)
    fs = get_feature_server(input_show, feature_server_type='basic')
    cep, _ = fs.load(show)
    cep.shape

    if input_sad is not None:
        init_diar = Diar.read_seg(input_sad)
        init_diar.pack(50)
    else:
        init_diar = segmentation.init_seg(cep, show)

    seg_diar = segmentation.segmentation(cep, init_diar, win_size)

    bicl_diar = segmentation.bic_linear(cep, seg_diar, thr_l, sr=False)

    bic = hac_bic.HAC_BIC(cep, bicl_diar, thr_h, sr=False)
    bich_diar = bic.perform(to_the_end=True)

    vit_diar = viterbi.viterbi_decoding(cep, bich_diar, thr_vit)
    resList = []
    currentPosition = 0
    for row in vit_diar:
        speakerValue = int(row[1][1:])
        while currentPosition < (row[3] + row[4]):
            resList.append(speakerValue)
            currentPosition += 20

    currentPosition = 0
    realityList = []
    realityFile = pd.read_csv(labelFileNameSegment.get(),
                              delimiter='\t',
                              encoding='utf-8',
                              names=['start', 'end', 'speaker'])
    for index, row in realityFile.iterrows():
        speakerValue = int(row['speaker'][1:])
        while currentPosition < row['end']:
            realityList.append(int(speakerValue))
            currentPosition += 0.2

    plot.subplot(3, 1, 2)
    plot.title("s4d:")
    plot.plot(np.arange(0, duration, duration / len(resList)), resList, 'ro')
    plot.subplot(3, 1, 1)
    plot.title("Реальность:")
    plot.plot(np.arange(0, duration, duration / len(realityList)), realityList,
              'bo')
    plot.subplot(3, 1, 3)
    plot.title("pyPlotAudio:")
    plot.plot(np.arange(0, duration, duration / len(result)), result, 'go')
    plot.show()