def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, pool_method, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() enroll, ids_e = np.unique(enroll, return_inverse=True) model = F.load_plda(plda_type, model_file) t1 = time.time() scores = model.llr_Nvs1(x_e, x_t, method=pool_method, ids1=ids_e) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt / num_trials * 1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file, preproc_file, model_file, score_file, plda_type, **kwargs): logging.info('loading data') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc) x_e, x_t, enroll, ndx, orig_seg = tdr.read() logging.info('loading plda model: %s' % (model_file)) model = F.load_plda(plda_type, model_file) t1 = time.time() logging.info('computing llr') scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) logging.info('combine cluster scores') scores = combine_diar_scores(ndx, orig_seg, scores) logging.info('saving scores to %s' % (score_file)) s = TrialScores(enroll, ndx.seg_set, scores) s = s.align_with_ndx(ndx) s.save_txt(score_file)
def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, plda_type, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() model = F.load_plda(plda_type, model_file) t1 = time.time() scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = x_e.shape[0] * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def eval_svm(iv_file, class2int_file, test_file, preproc_file, model_file, score_file, vector_score_file, eval_type, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, class2int_file, test_file, preproc, **tdr_args) x, ndx = tdr.read() model = SVM.load(model_file) t1 = time.time() scores = model.predict(x, eval_type) dt = time.time() - t1 num_trials = scores.shape[0]*scores.shape[1] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(ndx.model_set, ndx.seg_set, scores.T) s.save(score_file) if vector_score_file is not None: h5 = HDW(vector_score_file) h5.write(ndx.seg_set, '', scores)
def eval_cos(iv_file, ndx_file, enroll_file, test_file, preproc_file, score_file, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() lnorm = LNorm() x_e = lnorm.predict(x_e) x_t = lnorm.predict(x_t) t1 = time.time() scores = np.dot(x_e, x_t.T) dt = time.time() - t1 num_trials = x_e.shape[0] * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file, preproc_file, coh_iv_file, coh_list, coh_nbest, coh_nbest_discard, model_file, score_file, plda_type, **kwargs): logging.info('loading data') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc) x_e, x_t, enroll, ndx, orig_seg = tdr.read() logging.info('loading plda model: %s' % (model_file)) model = F.load_plda(plda_type, model_file) t1 = time.time() logging.info('computing llr') scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info( 'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' % (dt, dt / num_trials * 1000)) logging.info('loading cohort data') vr = VR(coh_iv_file, coh_list, preproc) x_coh = vr.read() t2 = time.time() logging.info('score cohort vs test') scores_coh_test = model.llr_1vs1(x_coh, x_t) logging.info('score enroll vs cohort') scores_enr_coh = model.llr_1vs1(x_e, x_coh) dt = time.time() - t2 logging.info('cohort-scoring elapsed time: %.2f s.' % (dt)) t2 = time.time() logging.info('apply s-norm') snorm = SNorm(nbest=coh_nbest, nbest_discard=coh_nbest_discard) scores = snorm.predict(scores, scores_coh_test, scores_enr_coh) dt = time.time() - t2 logging.info('s-norm elapsed time: %.2f s.' % (dt)) dt = time.time() - t1 logging.info( ('total-scoring elapsed time: %.2f s. ' 'elapsed time per trial: %.2f ms.') % (dt, dt / num_trials * 1000)) logging.info('combine cluster scores') scores = combine_diar_scores(ndx, orig_seg, scores) logging.info('saving scores to %s' % (score_file)) s = TrialScores(enroll, ndx.seg_set, scores) s = s.align_with_ndx(ndx) s.save_txt(score_file)
def convert(input_file, output_file, class_file): r = DRF.create(input_file) seg_set, score_mat = r.read(0, squeeze=True) with open(class_file, 'r') as f: model_set = [line.rstrip().split()[0] for line in f] scores = TrialScores(model_set, seg_set, score_mat.T) scores.save(output_file)
def create_scores(key_file='./tests/data_in/core-core_det5_key.h5'): key = TrialKey.load(key_file) mask = np.logical_or(key.tar, key.non) scr1 = TrialScores(key.model_set, key.seg_set, np.random.normal(size=key.tar.shape) * mask, mask) return scr1, key
def test_filter(): scr1 = create_scores()[0] scr1.sort() scr2 = TrialScores(scr1.model_set[:5], scr1.seg_set[:10], scr1.scores[:5, :10], scr1.score_mask[:5, :10]) scr3 = scr1.filter(scr2.model_set, scr2.seg_set, keep=True) assert (scr2 == scr3)
def test_load_save(): scr1 = create_scores()[0] scr1.sort() file_h5 = output_dir + '/test.h5' scr1.save(file_h5) scr2 = TrialScores.load(file_h5) assert (scr1 == scr2) file_txt = output_dir + '/test.txt' scr1.score_mask[:, :] = False scr1.score_mask[0, :] = True scr1.score_mask[:, 0] = True scr1.scores[scr1.score_mask == False] = 0 scr1.save(file_txt) scr2 = TrialScores.load(file_txt) assert (scr1 == scr2)
def test_split(): scr1 = create_scores()[0] scr1.sort() num_parts = 3 scr_list = [] for i in xrange(num_parts): for j in xrange(num_parts): scr_ij = scr1.split(i + 1, num_parts, j + 1, num_parts) scr_list.append(scr_ij) scr2 = TrialScores.merge(scr_list) assert (scr1 == scr2)
def eval_pdda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, pool_method, eval_method, num_samples_y, num_samples_z, num_samples_elbo, qy_only, **kwargs): set_float_cpu('float32') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdt_args) x_e, x_t, enroll, ndx = tdr.read() enroll, ids_e = np.unique(enroll, return_inverse=True) if qy_only: model = TVAEY.load(model_file) model.build(max_seq_length=2, num_samples=num_samples_y) else: model = TVAEYZ.load(model_file) model.build(max_seq_length=2, num_samples_y=num_samples_y, num_samples_z=num_samples_z) t1 = time.time() scores = model.eval_llr_Nvs1(x_e, ids_e, x_t, pool_method=pool_method, eval_method=eval_method, num_samples=num_samples_elbo) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def eval_2class_performance(score_file, key_file, output_path): scr = TrialScores.load(score_file) key = TrialKey.load(key_file) output_dir = os.path.dirname(output_path) if not (os.path.isdir(output_dir)): os.makedirs(output_dir, exist_ok=True) tar, non = scr.get_tar_non(key) eer = compute_eer(tar, non) output_file = output_path + '.res' with open(output_file, 'w') as f: f.write('EER %.4f\nNTAR %d\nNNON %d\n' % (eer, len(tar), len(non)))
def eval_calibration(in_score_file, ndx_file, model_file, out_score_file): logging.info('load ndx: %s' % ndx_file) try: ndx = TrialNdx.load_txt(ndx_file) except: ndx = TrialKey.load_txt(ndx_file) logging.info('load scores: %s' % in_score_file) scr = TrialScores.load_txt(in_score_file) scr = scr.align_with_ndx(ndx) logging.info('load model: %s' % model_file) lr = LR.load(model_file) logging.info('apply calibration') s_cal = lr.predict(scr.scores.ravel()) scr.scores = np.reshape(s_cal, scr.scores.shape) logging.info('save scores: %s' % out_score_file) scr.save_txt(out_score_file)
def convert(input_file, output_file, test_list, class_file, add_ext): scores = TrialScores.load(input_file) if test_list is None: seg_set = scores.seg_set else: with open(test_list, 'r') as f: seg_set = [ seg for seg in [line.rstrip().split(' ')[0] for line in f] if seg != 'segmentid' ] if add_ext: exts = [os.path.splitext(seg)[1] for seg in seg_set] seg_set = [os.path.splitext(seg)[0] for seg in seg_set] if class_file is None: model_set = scores.model_set else: with open(class_file, 'r') as f: model_set = [line.rstrip().split()[0] for line in f] ndx = TrialNdx(model_set, seg_set) scores = scores.set_missing_to_value(ndx, -100) if add_ext: scores.seg_set = [seg + ext for seg, ext in zip(scores.seg_set, exts)] with open(output_file, 'w') as f: f.write('segmentid\t') for model in scores.model_set[:-1]: f.write('%s\t' % model) f.write('%s\n' % scores.model_set[-1]) for i in xrange(scores.scores.shape[1]): f.write('%s\t' % scores.seg_set[i]) for j in xrange(scores.scores.shape[0] - 1): f.write('%f\t' % scores.scores[j, i]) f.write('%f\n' % scores.scores[-1, i])
def train_calibration(score_file, key_file, model_file, prior, verbose): logging.info('load key: %s' % key_file) key = TrialKey.load_txt(key_file) logging.info('load scores: %s' % score_file) scr = TrialScores.load_txt(score_file) tar, non = scr.get_tar_non(key) ntar = len(tar) nnon = len(non) min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior) n_miss = p_miss * ntar n_fa = p_fa * nnon logging.info( 'min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f' % (min_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)) logging.info('train calibration') x = np.concatenate((tar, non)) y = np.concatenate((np.ones( (ntar, ), dtype='int32'), np.zeros((nnon, ), dtype='int32'))) lr = LR(prior=prior, verbose=verbose) lr.fit(x, y) print(lr.A) print(lr.b) logging.info('save calibration at %s' % model_file) lr.save(model_file) logging.info('calibrate scores') tar_cal = lr.predict(tar) non_cal = lr.predict(non) print(tar_cal) act_dcf, p_miss, p_fa = compute_act_dcf(tar_cal, non_cal, prior) n_miss = p_miss * ntar n_fa = p_fa * nnon logging.info( 'act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f' % (act_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa))
def test_merge(): scr1 = create_scores()[0] scr1.sort() scr2 = TrialScores(scr1.model_set[:10], scr1.seg_set, scr1.scores[:10, :], scr1.score_mask[:10, :]) scr3 = TrialScores(scr1.model_set[10:], scr1.seg_set, scr1.scores[10:, :], scr1.score_mask[10:, :]) scr4 = TrialScores.merge([scr2, scr3]) assert (scr1 == scr4) scr2 = TrialScores(scr1.model_set, scr1.seg_set[:10], scr1.scores[:, :10], scr1.score_mask[:, :10]) scr3 = TrialScores(scr1.model_set, scr1.seg_set[10:], scr1.scores[:, 10:], scr1.score_mask[:, 10:]) scr4 = TrialScores.merge([scr2, scr3]) assert (scr1 == scr4)
def score_dcf(key_file, score_file, output_path): logging.info('Load key: %s' % key_file) key = TrialKey.load_txt(key_file) logging.info('Load scores: %s' % score_file) scr = TrialScores.load_txt(score_file) tar, non = scr.get_tar_non(key) priors = np.array([0.001, 0.005, 0.01, 0.05 ]) min_dcf, act_dcf, eer, _ = fast_eval(tar, non, priors) output_dir = os.path.dirname(output_path) if not os.path.isdir(output_dir): os.makedirs(output_dir) output_file = output_path + '_results' with open(output_file, 'w') as f: s = 'EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f}'.format( eer * 100, min_dcf[3], act_dcf[3], min_dcf[2], act_dcf[2], min_dcf[1], act_dcf[1], min_dcf[0], act_dcf[0]) f.write(s) logging.info(s)