Esempio n. 1
0
def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file,
              model_file, score_file, pool_method, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()
    enroll, ids_e = np.unique(enroll, return_inverse=True)

    model = F.load_plda(plda_type, model_file)

    t1 = time.time()

    scores = model.llr_Nvs1(x_e, x_t, method=pool_method, ids1=ids_e)

    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' %
                 (dt, dt / num_trials * 1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file,
              preproc_file,
              model_file, score_file, plda_type,
              **kwargs):
    
    logging.info('loading data')
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc)
    x_e, x_t, enroll, ndx, orig_seg = tdr.read()

    logging.info('loading plda model: %s' % (model_file))
    model = F.load_plda(plda_type, model_file)
    
    t1 = time.time()
    
    logging.info('computing llr')
    scores = model.llr_1vs1(x_e, x_t)
    
    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.'
          % (dt, dt/num_trials*1000))

    logging.info('combine cluster scores') 
    scores = combine_diar_scores(ndx, orig_seg, scores)

    logging.info('saving scores to %s' % (score_file))
    s = TrialScores(enroll, ndx.seg_set, scores)
    s = s.align_with_ndx(ndx)
    s.save_txt(score_file)
Esempio n. 3
0
def eval_plda(iv_file, ndx_file, enroll_file, test_file,
              preproc_file,
              model_file, score_file, plda_type, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()

    model = F.load_plda(plda_type, model_file)
    
    t1 = time.time()
    scores = model.llr_1vs1(x_e, x_t)
    
    dt = time.time() - t1
    num_trials = x_e.shape[0] * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
          % (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Esempio n. 4
0
def eval_svm(iv_file, class2int_file, test_file,
            preproc_file,
            model_file, score_file, vector_score_file,
            eval_type, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, class2int_file, test_file, preproc, **tdr_args)
    x, ndx = tdr.read()

    model = SVM.load(model_file)
    
    t1 = time.time()
    scores = model.predict(x, eval_type)
    
    dt = time.time() - t1
    num_trials = scores.shape[0]*scores.shape[1]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
                 % (dt, dt/num_trials*1000))

    s = TrialScores(ndx.model_set, ndx.seg_set, scores.T)
    s.save(score_file)

    if vector_score_file is not None:
        h5 = HDW(vector_score_file)
        h5.write(ndx.seg_set, '', scores)
Esempio n. 5
0
def eval_cos(iv_file, ndx_file, enroll_file, test_file,
             preproc_file, score_file, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()

    lnorm = LNorm()
    x_e = lnorm.predict(x_e)
    x_t = lnorm.predict(x_t)
    
    t1 = time.time()
    scores = np.dot(x_e, x_t.T)
    
    dt = time.time() - t1
    num_trials = x_e.shape[0] * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
                 % (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Esempio n. 6
0
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file,
              preproc_file, coh_iv_file, coh_list, coh_nbest,
              coh_nbest_discard, model_file, score_file, plda_type, **kwargs):

    logging.info('loading data')
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file,
              preproc)
    x_e, x_t, enroll, ndx, orig_seg = tdr.read()

    logging.info('loading plda model: %s' % (model_file))
    model = F.load_plda(plda_type, model_file)

    t1 = time.time()
    logging.info('computing llr')
    scores = model.llr_1vs1(x_e, x_t)

    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info(
        'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' %
        (dt, dt / num_trials * 1000))

    logging.info('loading cohort data')
    vr = VR(coh_iv_file, coh_list, preproc)
    x_coh = vr.read()

    t2 = time.time()
    logging.info('score cohort vs test')
    scores_coh_test = model.llr_1vs1(x_coh, x_t)
    logging.info('score enroll vs cohort')
    scores_enr_coh = model.llr_1vs1(x_e, x_coh)

    dt = time.time() - t2
    logging.info('cohort-scoring elapsed time: %.2f s.' % (dt))

    t2 = time.time()
    logging.info('apply s-norm')
    snorm = SNorm(nbest=coh_nbest, nbest_discard=coh_nbest_discard)
    scores = snorm.predict(scores, scores_coh_test, scores_enr_coh)
    dt = time.time() - t2
    logging.info('s-norm elapsed time: %.2f s.' % (dt))

    dt = time.time() - t1
    logging.info(
        ('total-scoring elapsed time: %.2f s. '
         'elapsed time per trial: %.2f ms.') % (dt, dt / num_trials * 1000))

    logging.info('combine cluster scores')
    scores = combine_diar_scores(ndx, orig_seg, scores)

    logging.info('saving scores to %s' % (score_file))
    s = TrialScores(enroll, ndx.seg_set, scores)
    s = s.align_with_ndx(ndx)
    s.save_txt(score_file)
Esempio n. 7
0
def convert(input_file, output_file, class_file):

    r = DRF.create(input_file)
    seg_set, score_mat = r.read(0, squeeze=True)

    with open(class_file, 'r') as f:
        model_set = [line.rstrip().split()[0] for line in f]

    scores = TrialScores(model_set, seg_set, score_mat.T)
    scores.save(output_file)
Esempio n. 8
0
def create_scores(key_file='./tests/data_in/core-core_det5_key.h5'):

    key = TrialKey.load(key_file)

    mask = np.logical_or(key.tar, key.non)
    scr1 = TrialScores(key.model_set, key.seg_set,
                       np.random.normal(size=key.tar.shape) * mask, mask)
    return scr1, key
Esempio n. 9
0
def test_filter():

    scr1 = create_scores()[0]
    scr1.sort()

    scr2 = TrialScores(scr1.model_set[:5], scr1.seg_set[:10],
                       scr1.scores[:5, :10], scr1.score_mask[:5, :10])
    scr3 = scr1.filter(scr2.model_set, scr2.seg_set, keep=True)
    assert (scr2 == scr3)
Esempio n. 10
0
def test_load_save():

    scr1 = create_scores()[0]
    scr1.sort()

    file_h5 = output_dir + '/test.h5'
    scr1.save(file_h5)
    scr2 = TrialScores.load(file_h5)
    assert (scr1 == scr2)

    file_txt = output_dir + '/test.txt'
    scr1.score_mask[:, :] = False
    scr1.score_mask[0, :] = True
    scr1.score_mask[:, 0] = True
    scr1.scores[scr1.score_mask == False] = 0
    scr1.save(file_txt)
    scr2 = TrialScores.load(file_txt)

    assert (scr1 == scr2)
Esempio n. 11
0
def test_split():

    scr1 = create_scores()[0]
    scr1.sort()

    num_parts = 3
    scr_list = []
    for i in xrange(num_parts):
        for j in xrange(num_parts):
            scr_ij = scr1.split(i + 1, num_parts, j + 1, num_parts)
            scr_list.append(scr_ij)
    scr2 = TrialScores.merge(scr_list)
    assert (scr1 == scr2)
Esempio n. 12
0
def eval_pdda(iv_file, ndx_file, enroll_file, test_file,
              preproc_file,
              model_file, score_file,
              pool_method, eval_method,
              num_samples_y, num_samples_z, num_samples_elbo, qy_only,
              **kwargs):

    set_float_cpu('float32')
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdt_args)
    x_e, x_t, enroll, ndx = tdr.read()
    enroll, ids_e = np.unique(enroll, return_inverse=True)

    if qy_only:
        model = TVAEY.load(model_file)
        model.build(max_seq_length=2, num_samples=num_samples_y)
    else:
        model = TVAEYZ.load(model_file)
        model.build(max_seq_length=2,
                    num_samples_y=num_samples_y, num_samples_z=num_samples_z)

    t1 = time.time()
    scores = model.eval_llr_Nvs1(x_e, ids_e, x_t,
                                 pool_method=pool_method,
                                 eval_method=eval_method,
                                 num_samples=num_samples_elbo)
    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' %
                 (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Esempio n. 13
0
def eval_2class_performance(score_file, key_file, output_path):

    scr = TrialScores.load(score_file)
    key = TrialKey.load(key_file)

    output_dir = os.path.dirname(output_path)
    if not (os.path.isdir(output_dir)):
        os.makedirs(output_dir, exist_ok=True)

    tar, non = scr.get_tar_non(key)
    eer = compute_eer(tar, non)

    output_file = output_path + '.res'
    with open(output_file, 'w') as f:
        f.write('EER %.4f\nNTAR %d\nNNON %d\n' % (eer, len(tar), len(non)))
Esempio n. 14
0
def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):

    logging.info('load ndx: %s' % ndx_file)
    try:
        ndx = TrialNdx.load_txt(ndx_file)
    except:
        ndx = TrialKey.load_txt(ndx_file)

    logging.info('load scores: %s' % in_score_file)
    scr = TrialScores.load_txt(in_score_file)
    scr = scr.align_with_ndx(ndx)

    logging.info('load model: %s' % model_file)
    lr = LR.load(model_file)
    logging.info('apply calibration')
    s_cal = lr.predict(scr.scores.ravel())
    scr.scores = np.reshape(s_cal, scr.scores.shape)

    logging.info('save scores: %s' % out_score_file)
    scr.save_txt(out_score_file)
Esempio n. 15
0
def convert(input_file, output_file, test_list, class_file, add_ext):

    scores = TrialScores.load(input_file)

    if test_list is None:
        seg_set = scores.seg_set
    else:
        with open(test_list, 'r') as f:
            seg_set = [
                seg for seg in [line.rstrip().split(' ')[0] for line in f]
                if seg != 'segmentid'
            ]
            if add_ext:
                exts = [os.path.splitext(seg)[1] for seg in seg_set]
                seg_set = [os.path.splitext(seg)[0] for seg in seg_set]

    if class_file is None:
        model_set = scores.model_set
    else:
        with open(class_file, 'r') as f:
            model_set = [line.rstrip().split()[0] for line in f]

    ndx = TrialNdx(model_set, seg_set)
    scores = scores.set_missing_to_value(ndx, -100)

    if add_ext:
        scores.seg_set = [seg + ext for seg, ext in zip(scores.seg_set, exts)]

    with open(output_file, 'w') as f:
        f.write('segmentid\t')
        for model in scores.model_set[:-1]:
            f.write('%s\t' % model)
        f.write('%s\n' % scores.model_set[-1])
        for i in xrange(scores.scores.shape[1]):
            f.write('%s\t' % scores.seg_set[i])
            for j in xrange(scores.scores.shape[0] - 1):
                f.write('%f\t' % scores.scores[j, i])
            f.write('%f\n' % scores.scores[-1, i])
def train_calibration(score_file, key_file, model_file, prior, verbose):

    logging.info('load key: %s' % key_file)
    key = TrialKey.load_txt(key_file)
    logging.info('load scores: %s' % score_file)
    scr = TrialScores.load_txt(score_file)
    tar, non = scr.get_tar_non(key)
    ntar = len(tar)
    nnon = len(non)

    min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior)
    n_miss = p_miss * ntar
    n_fa = p_fa * nnon
    logging.info(
        'min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f' %
        (min_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa))

    logging.info('train calibration')
    x = np.concatenate((tar, non))
    y = np.concatenate((np.ones(
        (ntar, ), dtype='int32'), np.zeros((nnon, ), dtype='int32')))
    lr = LR(prior=prior, verbose=verbose)
    lr.fit(x, y)
    print(lr.A)
    print(lr.b)
    logging.info('save calibration at %s' % model_file)
    lr.save(model_file)

    logging.info('calibrate scores')
    tar_cal = lr.predict(tar)
    non_cal = lr.predict(non)
    print(tar_cal)
    act_dcf, p_miss, p_fa = compute_act_dcf(tar_cal, non_cal, prior)
    n_miss = p_miss * ntar
    n_fa = p_fa * nnon
    logging.info(
        'act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f' %
        (act_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa))
Esempio n. 17
0
def test_merge():

    scr1 = create_scores()[0]
    scr1.sort()

    scr2 = TrialScores(scr1.model_set[:10], scr1.seg_set, scr1.scores[:10, :],
                       scr1.score_mask[:10, :])
    scr3 = TrialScores(scr1.model_set[10:], scr1.seg_set, scr1.scores[10:, :],
                       scr1.score_mask[10:, :])
    scr4 = TrialScores.merge([scr2, scr3])
    assert (scr1 == scr4)

    scr2 = TrialScores(scr1.model_set, scr1.seg_set[:10], scr1.scores[:, :10],
                       scr1.score_mask[:, :10])
    scr3 = TrialScores(scr1.model_set, scr1.seg_set[10:], scr1.scores[:, 10:],
                       scr1.score_mask[:, 10:])
    scr4 = TrialScores.merge([scr2, scr3])
    assert (scr1 == scr4)
Esempio n. 18
0
def score_dcf(key_file, score_file, output_path):

    logging.info('Load key: %s' % key_file)
    key = TrialKey.load_txt(key_file)
    logging.info('Load scores: %s' % score_file)
    scr = TrialScores.load_txt(score_file)
    tar, non = scr.get_tar_non(key)

    priors = np.array([0.001, 0.005, 0.01, 0.05 ])
    min_dcf, act_dcf, eer, _ = fast_eval(tar, non, priors)
    
    output_dir = os.path.dirname(output_path)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    output_file = output_path + '_results'
    with open(output_file, 'w') as f:
        s = 'EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f}'.format(
            eer * 100, min_dcf[3], act_dcf[3],
            min_dcf[2], act_dcf[2],
            min_dcf[1], act_dcf[1],
            min_dcf[0], act_dcf[0])
        f.write(s)
        logging.info(s)