Ejemplo n.º 1
0
def train_lda(iv_file, train_list, preproc_file, lda_dim, name, save_tlist,
              append_tlist, output_path, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vcr_args = VCR.filter_args(**kwargs)
    vcr = VCR(iv_file, train_list, preproc, **vcr_args)
    x, class_ids = vcr.read()

    t1 = time.time()

    model = LDA(lda_dim=lda_dim, name=name)
    model.fit(x, class_ids)

    logging.info('Elapsed time: %.2f s.' % (time.time() - t1))

    x = model.predict(x)

    s_mat = SbSw()
    s_mat.fit(x, class_ids)
    logging.debug(s_mat.Sb[:4, :4])
    logging.debug(s_mat.Sw[:4, :4])

    if save_tlist:
        if append_tlist and preproc is not None:
            preproc.append(model)
            model = preproc
        else:
            model = TransformList(model)

    model.save(output_path)
Ejemplo n.º 2
0
def train_mvn(iv_file, train_list, preproc_file, name, save_tlist,
              append_tlist, output_path, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vr_args = VR.filter_args(**kwargs)
    vr = VR(iv_file, train_list, preproc, **vr_args)
    x = vr.read()

    t1 = time.time()

    model = MVN(name=name)
    model.fit(x)

    logging.info('Elapsed time: %.2f s.' % (time.time() - t1))

    if save_tlist:
        if append_tlist and preproc is not None:
            preproc.append(model)
            model = preproc
        else:
            model = TransformList(model)

    model.save(output_path)
Ejemplo n.º 3
0
def train_gauss(iv_file, train_list, preproc_file, 
                save_tlist, append_tlist, input_path, output_path, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vr_args = VR.filter_args(**kwargs)
    vr = VR(iv_file, train_list, preproc, **vr_args)
    x = vr.read()

    t1 = time.time()

    model_args = Gaussianizer.filter_args(**kwargs)
    model = load_model(input_path, **model_args)
    
    model.fit(x)

    if save_tlist:
        if append_tlist and preproc is not None:
            preproc.append(model)
            model = preproc
        else:
            model = TransformList(model)

    model.save(output_path)
Ejemplo n.º 4
0
def eval_plda(iv_file, ndx_file, enroll_file, test_file,
              preproc_file,
              model_file, score_file, plda_type, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()

    model = F.load_plda(plda_type, model_file)
    
    t1 = time.time()
    scores = model.llr_1vs1(x_e, x_t)
    
    dt = time.time() - t1
    num_trials = x_e.shape[0] * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
          % (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Ejemplo n.º 5
0
def plot_vector_hist(iv_file, v_list, preproc_file, output_path, num_bins,
                     normed, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vr_args = VR.filter_args(**kwargs)
    vr = VR(iv_file, v_list, preproc, **vr_args)
    x = vr.read()

    t1 = time.time()

    if not os.path.exists(output_path):
        os.makedirs(ouput_path)

    for i in xrange(x.shape[1]):

        fig_file = '%s/D%04d.pdf' % (output_path, i)

        plt.hist(x[:, i], num_bins, normed=normed)
        plt.xlabel('Dim %d' % i)
        plt.grid(True)
        plt.show()
        plt.savefig(fig_file)
        plt.clf()

    logging.info('Elapsed time: %.2f s.' % (time.time() - t1))
Ejemplo n.º 6
0
def eval_cos(iv_file, ndx_file, enroll_file, test_file,
             preproc_file, score_file, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()

    lnorm = LNorm()
    x_e = lnorm.predict(x_e)
    x_t = lnorm.predict(x_t)
    
    t1 = time.time()
    scores = np.dot(x_e, x_t.T)
    
    dt = time.time() - t1
    num_trials = x_e.shape[0] * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
                 % (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Ejemplo n.º 7
0
def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file,
              model_file, score_file, pool_method, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args)
    x_e, x_t, enroll, ndx = tdr.read()
    enroll, ids_e = np.unique(enroll, return_inverse=True)

    model = F.load_plda(plda_type, model_file)

    t1 = time.time()

    scores = model.llr_Nvs1(x_e, x_t, method=pool_method, ids1=ids_e)

    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' %
                 (dt, dt / num_trials * 1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
Ejemplo n.º 8
0
def train_plda(iv_file, train_list, val_list, preproc_file,
               epochs, ml_md, md_epochs,
               output_path, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vcr_args = VCR.filter_args(**kwargs)
    vcr_train = VCR(iv_file, train_list, preproc, **vcr_args)
    x, class_ids = vcr_train.read()

    x_val = None
    class_ids_val = None
    if val_list is not None:
        vcr_val = VCR(iv_file, val_list, preproc, **vcr_args)
        x_val, class_ids_val = vcr_val.read()
        
    t1 = time.time()

    plda_args = F.filter_train_args(**kwargs)
    model = F.create_plda(**plda_args)
    elbos = model.fit(x, class_ids, x_val=x_val, class_ids_val=class_ids_val,
                      epochs=epochs, ml_md=ml_md, md_epochs=md_epochs)

    logging.info('Elapsed time: %.2f s.' % (time.time()-t1))
    
    model.save(output_path)

    elbo = np.vstack(elbos)
    num = np.arange(epochs)
    elbo = np.vstack((num, elbo)).T
    elbo_path=os.path.splitext(output_path)[0] + '.csv'
    np.savetxt(elbo_path, elbo, delimiter=',')
Ejemplo n.º 9
0
def eval_svm(iv_file, class2int_file, test_file,
            preproc_file,
            model_file, score_file, vector_score_file,
            eval_type, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, class2int_file, test_file, preproc, **tdr_args)
    x, ndx = tdr.read()

    model = SVM.load(model_file)
    
    t1 = time.time()
    scores = model.predict(x, eval_type)
    
    dt = time.time() - t1
    num_trials = scores.shape[0]*scores.shape[1]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.'
                 % (dt, dt/num_trials*1000))

    s = TrialScores(ndx.model_set, ndx.seg_set, scores.T)
    s.save(score_file)

    if vector_score_file is not None:
        h5 = HDW(vector_score_file)
        h5.write(ndx.seg_set, '', scores)
Ejemplo n.º 10
0
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file,
              preproc_file,
              model_file, score_file, plda_type,
              **kwargs):
    
    logging.info('loading data')
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc)
    x_e, x_t, enroll, ndx, orig_seg = tdr.read()

    logging.info('loading plda model: %s' % (model_file))
    model = F.load_plda(plda_type, model_file)
    
    t1 = time.time()
    
    logging.info('computing llr')
    scores = model.llr_1vs1(x_e, x_t)
    
    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.'
          % (dt, dt/num_trials*1000))

    logging.info('combine cluster scores') 
    scores = combine_diar_scores(ndx, orig_seg, scores)

    logging.info('saving scores to %s' % (score_file))
    s = TrialScores(enroll, ndx.seg_set, scores)
    s = s.align_with_ndx(ndx)
    s.save_txt(score_file)
Ejemplo n.º 11
0
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file,
              preproc_file, coh_iv_file, coh_list, coh_nbest,
              coh_nbest_discard, model_file, score_file, plda_type, **kwargs):

    logging.info('loading data')
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file,
              preproc)
    x_e, x_t, enroll, ndx, orig_seg = tdr.read()

    logging.info('loading plda model: %s' % (model_file))
    model = F.load_plda(plda_type, model_file)

    t1 = time.time()
    logging.info('computing llr')
    scores = model.llr_1vs1(x_e, x_t)

    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info(
        'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' %
        (dt, dt / num_trials * 1000))

    logging.info('loading cohort data')
    vr = VR(coh_iv_file, coh_list, preproc)
    x_coh = vr.read()

    t2 = time.time()
    logging.info('score cohort vs test')
    scores_coh_test = model.llr_1vs1(x_coh, x_t)
    logging.info('score enroll vs cohort')
    scores_enr_coh = model.llr_1vs1(x_e, x_coh)

    dt = time.time() - t2
    logging.info('cohort-scoring elapsed time: %.2f s.' % (dt))

    t2 = time.time()
    logging.info('apply s-norm')
    snorm = SNorm(nbest=coh_nbest, nbest_discard=coh_nbest_discard)
    scores = snorm.predict(scores, scores_coh_test, scores_enr_coh)
    dt = time.time() - t2
    logging.info('s-norm elapsed time: %.2f s.' % (dt))

    dt = time.time() - t1
    logging.info(
        ('total-scoring elapsed time: %.2f s. '
         'elapsed time per trial: %.2f ms.') % (dt, dt / num_trials * 1000))

    logging.info('combine cluster scores')
    scores = combine_diar_scores(ndx, orig_seg, scores)

    logging.info('saving scores to %s' % (score_file))
    s = TrialScores(enroll, ndx.seg_set, scores)
    s = s.align_with_ndx(ndx)
    s.save_txt(score_file)
Ejemplo n.º 12
0
def extract_embed(seq_file, model_file, preproc_file, output_path,
                  max_seq_length, pooling_output, write_format, **kwargs):

    set_float_cpu('float32')
    
    sr_args = SDRF.filter_args(**kwargs)
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SDRF.create(seq_file, transform=preproc, **sr_args)
    
    t1 = time.time()

    model = SeqQEmbed.load(model_file)
    model.build(max_seq_length)
    model.build_embed(pooling_output)
    y_dim = model.embed_dim

    _, seq_lengths = sr.read_num_rows()
    sr.reset()
    num_seqs = len(seq_lengths)

    p1_y = np.zeros((num_seqs, y_dim), dtype=float_keras())
    p2_y = np.zeros((num_seqs, y_dim), dtype=float_keras())
    keys = []

    for i in xrange(num_seqs):
        ti1 = time.time()
        key, data = sr.read(1)
        
        ti2 = time.time()
        logging.info('Extracting embeddings %d/%d for %s, num_frames: %d' %
              (i, num_seqs, key[0], data[0].shape[0]))
        keys.append(key[0])
        p1_y[i], p2_y[i] = model.predict_embed(data[0])
                
        ti4 = time.time()
        logging.info('Elapsed time embeddings %d/%d for %s, total: %.2f read: %.2f, vae: %.2f' %
              (i, num_seqs, key, ti4-ti1, ti2-ti1, ti4-ti2))
            
    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))

    if write_format == 'p1':
        y = p1_y
    elif write_format == 'p1+p2':
        y = np.hstack((p1_y, p2_y))
    else:
        y = p2_y
    
    hw = DWF.create(output_path)
    hw.write(keys, y)
Ejemplo n.º 13
0
def load_model(input_path, **kwargs):

    if input_path is None:
        return Gaussianizer(**kwargs)

    try:
        return Gaussianizer.load(input_path)
    except:
        tfl = TransformList.load(input_path)
        for tf in tfl.transforms:
            if tf.name == name:
                return tf
Ejemplo n.º 14
0
def load_model(input_path, name, **kwargs):

    if input_path is None:
        return PCA(name=name, **kwargs)

    try:
        return PCA.load(input_path)
    except:
        tfl = TransformList.load(input_path)
        for tf in tfl.transforms:
            if tf.name == name:
                return tf
Ejemplo n.º 15
0
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file, output_path,
                    qy_only, **kwargs):

    set_float_cpu('float32')
    
    sr_args = SR.filter_eval_args(**kwargs)
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    gmm = DiagGMM.load_from_kaldi(gmm_file)
        
    sr = SR(seq_file, file_list, batch_size=1,
            shuffle_seqs=False,
            preproc=preproc, **sr_args)
    
    t1 = time.time()

    # if qy_only:
    #     model = TVAEY.load(model_file)
    # else:
    model = TVAEYZ.load(model_file)
        
    model.build(max_seq_length=sr.max_batch_seq_length)
            
    y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras())
    xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim), dtype=float_keras())
    rr = np.zeros((1, sr.max_batch_seq_length, model.r_dim), dtype=float_keras())
    keys = []
    for i in xrange(sr.num_seqs):
        ti1 = time.time()
        x, key = sr.read_next_seq()
        ti2 = time.time()
        r = gmm.compute_z(x)
        ti3 = time.time()
        logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0]))
        keys.append(key)
        xx[:,:,:] = 0
        rr[:,:,:] = 0
        xx[0,:x.shape[0]] = x
        rr[0,:x.shape[0]] = r
        y[i] = model.compute_qy_x([xx, rr], batch_size=1)[0]
        ti4 = time.time()
        logging.info('Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f' %
                     (i, sr.num_seqs, key, ti4-ti1, ti2-ti1, ti3-ti2, ti4-ti3))
            
    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))
    
    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)
Ejemplo n.º 16
0
def train_cw(iv_file, train_list, preproc_file, with_lnorm,
             save_tlist, append_tlist, input_path, output_path, **kwargs):
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vr_args = VR.filter_args(**kwargs)
    vr = VR(iv_file, train_list, preproc, **vr_args)
    x = vr.read()

    t1 = time.time()

    model_args = CentWhiten.filter_args(**kwargs)
    model = load_model(input_path, with_lnorm, **model_args)
    
    model.fit(x)

    logging.info('Elapsed time: %.2f s.' % (time.time()-t1))
    
    x = model.predict(x)

    gauss=Normal(x_dim=x.shape[1])
    gauss.fit(x=x)
    logging.debug(gauss.mu[:4])
    logging.debug(gauss.Sigma[:4,:4])

    if save_tlist:
        if append_tlist and preproc is not None:
            preproc.append(model)
            model = preproc
        else:
            model = TransformList(model)

    model.save(output_path)
Ejemplo n.º 17
0
def extract_ivector(seq_file, file_list, model_file, preproc_file, output_path,
                    qy_only, **kwargs):

    set_float_cpu('float32')

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    if qy_only:
        model = TVAEY.load(model_file)
    else:
        model = TVAEYZ.load(model_file)

    model.build(max_seq_length=sr.max_batch_seq_length)

    logging.info(time.time() - t1)
    logging.info(model.y_dim)
    y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras())
    xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim),
                  dtype=float_keras())
    keys = []
    for i in xrange(sr.num_seqs):
        x, key = sr.read_next_seq()
        logging.info('Extracting i-vector %d/%d for %s\n' %
                     (i, sr.num_seqs, key))
        keys.append(key)
        xx[:, :, :] = 0
        xx[0, :x.shape[0]] = x
        y[i] = model.compute_qy_x(xx, batch_size=1)[0]

    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))

    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)
Ejemplo n.º 18
0
def load_model(input_path, with_lnorm, name, **kwargs):

    if input_path is None:
        if with_lnorm:
            return LNorm(name=name, **kwargs)
        else:
            return CentWhiten(name=name, **kwargs)

    try:
        if with_lnorm:
            return LNorm.load(input_path)
        else:
            return CentWhiten(input_path)
    except:
        tfl = TransformList.load(input_path)
        for tf in tfl.transforms:
            if tf.name == name:
                return tf
Ejemplo n.º 19
0
def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs):

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    vcr_args = VCR.filter_args(**kwargs)
    vcr_train = VCR(iv_file, train_list, preproc, **vcr_args)
    x, class_ids = vcr_train.read()

    t1 = time.time()

    model_args = GBE.filter_train_args(**kwargs)
    model = GBE(**model_args)
    model.fit(x, class_ids)
    logging.info('Elapsed time: %.2f s.' % (time.time() - t1))

    model.save(output_path)
Ejemplo n.º 20
0
def eval_elbo(seq_file, file_list, model_file, preproc_file, output_file,
              ubm_type, **kwargs):

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    if ubm_type == 'diag-gmm':
        model = DiagGMM.load(model_file)
    else:
        model = DiagGMM.load_from_kaldi(model_file)
    model.initialize()

    elbo = np.zeros((sr.num_seqs, ), dtype=float_cpu())
    num_frames = np.zeros((sr.num_seqs, ), dtype=int)
    keys = []
    for i in xrange(sr.num_seqs):
        x, key = sr.read_next_seq()
        keys.append(key)
        elbo[i] = model.elbo(x)
        num_frames[i] = x.shape[0]

    num_total_frames = np.sum(num_frames)
    total_elbo = np.sum(elbo)
    total_elbo_norm = total_elbo / num_total_frames
    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))
    s = 'Total ELBO: %f\nELBO_NORM %f' % (total_elbo, total_elbo_norm)
    logging.info(s)

    with open(output_file, 'w') as f:
        f.write(s)
Ejemplo n.º 21
0
def compute_gmm_post(seq_file, file_list, model_file, preproc_file,
                     output_path, num_comp, **kwargs):

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    gmm = DiagGMM.load_from_kaldi(model_file)

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    logging.info(time.time() - t1)
    index = np.zeros((sr.num_seqs, num_comp), dtype=int)

    hw = HypDataWriter(output_path)
    for i in xrange(sr.num_seqs):
        x, key = sr.read_next_seq()
        logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' %
                     (i, sr.num_seqs, key, x.shape[0]))
        r = gmm.compute_z(x)
        r_s, index = to_sparse(r, num_comp)
        if i == 0:
            r2 = to_dense(r_s, index, r.shape[1])
            logging.degug(np.sort(r[0, :])[-12:])
            logging.degug(np.sort(r2[0, :])[-12:])
            logging.degug(np.argsort(r[0, :])[-12:])
            logging.degug(np.argsort(r2[0, :])[-12:])

        hw.write([key], '.r', [r_s])
        hw.write([key], '.index', [index])

    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))
Ejemplo n.º 22
0
def eval_pdda(iv_file, ndx_file, enroll_file, test_file,
              preproc_file,
              model_file, score_file,
              pool_method, eval_method,
              num_samples_y, num_samples_z, num_samples_elbo, qy_only,
              **kwargs):

    set_float_cpu('float32')
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr_args = TDR.filter_args(**kwargs)
    tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdt_args)
    x_e, x_t, enroll, ndx = tdr.read()
    enroll, ids_e = np.unique(enroll, return_inverse=True)

    if qy_only:
        model = TVAEY.load(model_file)
        model.build(max_seq_length=2, num_samples=num_samples_y)
    else:
        model = TVAEYZ.load(model_file)
        model.build(max_seq_length=2,
                    num_samples_y=num_samples_y, num_samples_z=num_samples_z)

    t1 = time.time()
    scores = model.eval_llr_Nvs1(x_e, ids_e, x_t,
                                 pool_method=pool_method,
                                 eval_method=eval_method,
                                 num_samples=num_samples_elbo)
    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' %
                 (dt, dt/num_trials*1000))

    s = TrialScores(enroll, ndx.seg_set, scores)
    s.save(score_file)
def tracking_plda(iv_file, ndx_file, enroll_file, segments_file, preproc_file,
                  model_file, rttm_file, plda_type, **kwargs):

    logging.info('loading data')
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    tdr = TDR(iv_file, ndx_file, enroll_file, segments_file, preproc)
    x_e, x_t, enroll, ndx_seg, ext_segments = tdr.read()

    logging.info('loading plda model: %s' % (model_file))
    model = F.load_plda(plda_type, model_file)

    t1 = time.time()

    logging.info('computing llr')
    scores = model.llr_1vs1(x_e, x_t)

    dt = time.time() - t1
    num_trials = len(enroll) * x_t.shape[0]
    logging.info(
        'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' %
        (dt, dt / num_trials * 1000))

    scores = TrialScores(enroll, ndx_seg.seg_set, scores)
    new_ext_segment_ids, ext_segment_ids, model_ids, scores = flatten_segment_scores(
        ndx_seg, scores)
    new_ext_segments = prepare_output_ext_segments(ext_segments,
                                                   new_ext_segment_ids,
                                                   ext_segment_ids, model_ids,
                                                   scores)
    new_ext_segments.save(rttm_file + '_es')
    rttm = RTTM.create_spkdiar_from_ext_segments(new_ext_segments)
    rttm.save(rttm_file)
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file,
                    output_path, qy_only, **kwargs):

    set_float_cpu('float32')

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    gmm = DiagGMM.load_from_kaldi(gmm_file)

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    # if qy_only:
    #     model = TVAEY.load(model_file)
    # else:
    model = TVAEYZ.load(model_file)

    #model.build(max_seq_length=sr.max_batch_seq_length)
    #model.build(max_seq_length=1)
    model.x_dim = 60
    model.r_dim = 2048
    model.y_dim = 400

    y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras())
    xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim),
                  dtype=float_keras())
    rr = np.zeros((1, sr.max_batch_seq_length, model.r_dim),
                  dtype=float_keras())
    keys = []

    xp = Input(shape=(
        sr.max_batch_seq_length,
        model.x_dim,
    ))
    rp = Input(shape=(
        sr.max_batch_seq_length,
        model.r_dim,
    ))
    qy_param = model.qy_net([xp, rp])
    qy_net = Model([xp, rp], qy_param)
    for i in xrange(sr.num_seqs):
        ti1 = time.time()
        x, key = sr.read_next_seq()
        ti2 = time.time()
        r = gmm.compute_z(x)
        ti3 = time.time()
        logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' %
                     (i, sr.num_seqs, key, x.shape[0]))
        keys.append(key)
        # xp = Input(shape=(x.shape[0], model.x_dim,))
        # rp = Input(shape=(x.shape[0], model.r_dim,))
        # qy_param = model.qy_net([xp, rp])
        ti5 = time.time()
        xx[:, :, :] = 0
        rr[:, :, :] = 0
        xx[0, :x.shape[0]] = x
        rr[0, :x.shape[0]] = r
        # x = np.expand_dims(x, axis=0)
        # r = np.expand_dims(r, axis=0)
        # qy_net = Model([xp, rp], qy_param)
        y[i] = qy_net.predict([xx, rr], batch_size=1)[0]
        # del qy_net
        # y[i] = model.compute_qy_x2([x, r], batch_size=1)[0]
        #for i in xrange(10):
        #gc.collect()
        ti4 = time.time()
        logging.info(
            'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f qy: %.2f'
            % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti3 - ti2, ti4 - ti5,
               ti5 - ti3))

        # print('Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f' %
        #       (i, sr.num_seqs, key, ti4-ti1, ti2-ti1, ti3-ti2, ti4-ti3))

    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))

    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)
Ejemplo n.º 25
0
def extract_ivector(seq_file, file_list, post_file, model_file, preproc_file,
                    output_path, qy_only, max_length, layer_name, **kwargs):

    set_float_cpu('float32')

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SR(seq_file,
            file_list,
            post_file,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    # if qy_only:
    #     model = TVAEY.load(model_file)
    # else:
    model = TVAEYZ.load(model_file)

    pt_input = model.pt_net.input
    pt_output = model.pt_net.get_layer(layer_name).output
    pt_dim = model.pt_net.get_layer(layer_name).output_shape[-1]
    #model.build(max_seq_length=sr.max_batch_seq_length)
    model.build(max_seq_length=1)

    max_length = np.minimum(sr.max_batch_seq_length, max_length)

    y = np.zeros((sr.num_seqs, pt_dim), dtype=float_keras())
    xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras())
    rr = np.zeros((1, max_length, model.r_dim), dtype=float_keras())
    keys = []

    xp = Input(shape=(
        max_length,
        model.x_dim,
    ))
    rp = Input(shape=(
        max_length,
        model.r_dim,
    ))
    qy_param = model.qy_net([xp, rp])

    pt_net = Model(pt_input, pt_output)
    emb = pt_net(qy_param[0])
    emb_net = Model([xp, rp], emb)
    model.pt_net.summary()
    pt_net.summary()
    emb_net.summary()
    logging.info(layer_name)
    #emb_net = Model([xp, rp], pt_net.get_layer('pt').get_layer(layer_name).output)
    #pt_net = Model(model.pt_net.input, model.pt_net.get_layer(layer_name).output)
    # emb = pt_net(qy_param[0])
    #emb_net = Model([xp, rp], emb)

    for i in xrange(sr.num_seqs):
        ti1 = time.time()
        x, r, key = sr.read_next_seq()
        ti2 = time.time()
        logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' %
                     (i, sr.num_seqs, key, x.shape[0]))
        keys.append(key)
        xx[:, :, :] = 0
        rr[:, :, :] = 0

        if x.shape[0] <= max_length:
            xx[0, :x.shape[0]] = x
            rr[0, :x.shape[0]] = r
            y[i] = emb_net.predict([xx, rr], batch_size=1)
        else:
            num_batches = int(np.ceil(x.shape[0] / max_length))
            for j in xrange(num_batches - 1):
                start = j * max_length
                xx[0] = x[start:start + max_length]
                rr[0] = r[start:start + max_length]
                y[i] += emb_net.predict([xx, rr], batch_size=1).ravel()
            xx[0] = x[-max_length:]
            rr[0] = r[-max_length:]
            y[i] += emb_net.predict([xx, rr], batch_size=1).ravel()
            y[i] /= num_batches

        ti4 = time.time()
        logging.info(
            'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, vae: %.2f'
            % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti4 - ti2))

    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))

    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)
Ejemplo n.º 26
0
def train_embed(data_path, train_list, val_list, px_net_path, pt_net_path,
                qy_net_path, qz_net_path, init_path, epochs, preproc_file,
                output_path, freeze_embed, **kwargs):

    g = reserve_gpu()
    set_float_cpu(float_keras())

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sg_args = G.filter_args(**kwargs)
    sg = G(data_path,
           train_list,
           shuffle_seqs=True,
           reset_rng=False,
           transform=preproc,
           **sg_args)
    max_length = sg.max_seq_length
    gen_val = None
    if val_list is not None:
        sg_val = G(data_path,
                   val_list,
                   transform=preproc,
                   shuffle_seqs=False,
                   reset_rng=True,
                   **sg_args)
        max_length = max(max_length, sg_val.max_seq_length)
        gen_val = data_generator(sg_val, max_length)

    gen_train = data_generator(sg, max_length)

    if init_path is None:
        model, init_epoch = KML.load_checkpoint(output_path, epochs)
        if model is None:
            embed_args = VAE.filter_args(**kwargs)
            logging.debug(embed_args)
            px_net = load_model_arch(px_net_path)
            qy_net = load_model_arch(qy_net_path)
            qz_net = load_model_arch(qz_net_path)
            pt_net = load_model_arch(pt_net_path)

            model = VAE(px_net, qy_net, qz_net, pt_net, **embed_args)
        else:
            sg.cur_epoch = init_epoch
            sg.reset()
    else:
        logging.info('loading init model: %s' % init_path)
        model = KML.load(init_path)

    model.px_weight = kwargs['px_weight']
    model.pt_weight = kwargs['pt_weight']
    model.kl_qy_weight = kwargs['kl_qy_weight']
    model.kl_qz_weight = kwargs['kl_qz_weight']

    opt_args = KOF.filter_args(**kwargs)
    cb_args = KCF.filter_args(**kwargs)
    logging.debug(sg_args)
    logging.debug(opt_args)
    logging.debug(cb_args)

    logging.info('max length: %d' % max_length)

    t1 = time.time()

    if freeze_embed:
        model.prepool_net.trainable = False

    model.build(max_length)
    logging.info(time.time() - t1)

    cb = KCF.create_callbacks(model, output_path, **cb_args)
    opt = KOF.create_optimizer(**opt_args)
    model.compile(optimizer=opt)

    h = model.fit_generator(gen_train,
                            validation_data=gen_val,
                            steps_per_epoch=sg.steps_per_epoch,
                            validation_steps=sg_val.steps_per_epoch,
                            initial_epoch=sg.cur_epoch,
                            epochs=epochs,
                            callbacks=cb,
                            max_queue_size=10)

    logging.info('Train elapsed time: %.2f' % (time.time() - t1))

    model.save(output_path + '/model')
def extract_embed(seq_file, file_list, model_file, preproc_file, output_path,
                  max_length, layer_names, **kwargs):

    set_float_cpu('float32')

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    model = SeqEmbed.load(model_file)
    model.build()
    print(layer_names)
    model.build_embed(layer_names)
    y_dim = model.embed_dim

    max_length = np.minimum(sr.max_batch_seq_length, max_length)

    y = np.zeros((sr.num_seqs, y_dim), dtype=float_keras())
    xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras())
    keys = []

    for i in xrange(sr.num_seqs):
        ti1 = time.time()
        x, key = sr.read_next_seq()
        ti2 = time.time()
        print('Extracting embeddings %d/%d for %s, num_frames: %d' %
              (i, sr.num_seqs, key, x.shape[0]))
        keys.append(key)
        xx[:, :, :] = 0

        if x.shape[0] <= max_length:
            xx[0, :x.shape[0]] = x
            y[i] = model.predict_embed(xx, batch_size=1)
        else:
            num_chunks = int(np.ceil(float(x.shape[0]) / max_length))
            chunk_size = int(np.ceil(float(x.shape[0]) / num_chunks))
            for j in xrange(num_chunks - 1):
                start = j * chunk_size
                xx[0, :chunk_size] = x[start:start + chunk_size]
                y[i] += model.predict_embed(xx, batch_size=1).ravel()
            xx[0, :chunk_size] = x[-chunk_size:]
            y[i] += model.predict_embed(xx, batch_size=1).ravel()
            y[i] /= num_chunks

        ti4 = time.time()
        print(
            'Elapsed time embeddings %d/%d for %s, total: %.2f read: %.2f, vae: %.2f'
            % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti4 - ti2))

    print('Extract elapsed time: %.2f' % (time.time() - t1))

    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)
Ejemplo n.º 28
0
def train_embed(seq_file, train_list, val_list, class_list, embed_file,
                init_path, epochs, batch_size, preproc_file, output_path,
                post_pdf, pooling_input, pooling_output, min_var, **kwargs):

    set_float_cpu(float_keras())

    sr_args = SR.filter_args(**kwargs)
    sr_val_args = SR.filter_val_args(**kwargs)
    opt_args = KOF.filter_args(**kwargs)
    cb_args = KCF.filter_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    sr = SR(seq_file,
            train_list,
            class_list,
            batch_size=batch_size,
            preproc=preproc,
            **sr_args)
    max_length = sr.max_batch_seq_length
    gen_val = None
    if val_list is not None:
        sr_val = SR(seq_file,
                    val_list,
                    class_list,
                    batch_size=batch_size,
                    preproc=preproc,
                    shuffle_seqs=False,
                    seq_split_mode='sequential',
                    seq_split_overlap=0,
                    reset_rng=True,
                    **sr_val_args)
        max_length = max(max_length, sr_val.max_batch_seq_length)
        gen_val = data_generator(sr_val, max_length)

    gen_train = data_generator(sr, max_length)

    t1 = time.time()
    if init_path is None:
        embed_net = load_model_arch(embed_file)

        model = SeqMetaEmbed(embed_net,
                             num_classes=sr.num_classes,
                             post_pdf=post_pdf,
                             pooling_input=pooling_input,
                             pooling_output=pooling_output,
                             min_var=min_var)
    else:
        logging.info('loading init model: %s' % init_path)
        model = SeqMetaEmbed.load(init_path)

    logging.info('max length: %d' % max_length)
    model.build(max_length)
    logging.info(time.time() - t1)

    cb = KCF.create_callbacks(model, output_path, **cb_args)
    opt = KOF.create_optimizer(**opt_args)
    model.compile(optimizer=opt)

    h = model.fit_generator(gen_train,
                            validation_data=gen_val,
                            steps_per_epoch=sr.num_batches,
                            validation_steps=sr_val.num_batches,
                            epochs=epochs,
                            callbacks=cb,
                            max_queue_size=10)

    logging.info('Train elapsed time: %.2f' % (time.time() - t1))

    model.save(output_path + '/model')
Ejemplo n.º 29
0
def train_embed(data_path, train_list, val_list,
                train_list_adapt, val_list_adapt,
                prepool_net_path, postpool_net_path,
                init_path,
                epochs, 
                preproc_file, output_path,
                freeze_prepool, freeze_postpool_layers,
                **kwargs):

    set_float_cpu(float_keras())
        
    if init_path is None:
        model, init_epoch = KML.load_checkpoint(output_path, epochs)
        if model is None:
            emb_args = SeqEmbed.filter_args(**kwargs)
            prepool_net = load_model_arch(prepool_net_path)
            postpool_net = load_model_arch(postpool_net_path)

            model = SeqEmbed(prepool_net, postpool_net,
                             loss='categorical_crossentropy',
                             **emb_args)
        else:
            kwargs['init_epoch'] = init_epoch
    else:
        logging.info('loading init model: %s' % init_path)
        model = KML.load(init_path)

    
    sg_args = G.filter_args(**kwargs)
    opt_args = KOF.filter_args(**kwargs)
    cb_args = KCF.filter_args(**kwargs)
    logging.debug(sg_args)
    logging.debug(opt_args)
    logging.debug(cb_args)
    
    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None
        
    sg = G(data_path, train_list, train_list_adapt,
           shuffle_seqs=True, reset_rng=False,
           transform=preproc, **sg_args)
    max_length = sg.max_seq_length
    gen_val = None
    if val_list is not None:
        sg_val = G(data_path, val_list, val_list_adapt,
                    transform=preproc,
                    shuffle_seqs=False, reset_rng=True,
                    **sg_args)
        max_length = max(max_length, sg_val.max_seq_length)
        gen_val = data_generator(sg_val, max_length)

    gen_train = data_generator(sg, max_length)
    
    logging.info('max length: %d' % max_length)
    
    t1 = time.time()
    if freeze_prepool:
        model.freeze_prepool_net()

    if freeze_postpool_layers is not None:
        model.freeze_postpool_net_layers(freeze_postpool_layers)
    
    model.build(max_length)
    
    cb = KCF.create_callbacks(model, output_path, **cb_args)
    opt = KOF.create_optimizer(**opt_args)
    model.compile(optimizer=opt)
    
    h = model.fit_generator(gen_train, validation_data=gen_val,
                            steps_per_epoch=sg.steps_per_epoch,
                            validation_steps=sg_val.steps_per_epoch,
                            initial_epoch=sg.cur_epoch,
                            epochs=epochs, callbacks=cb, max_queue_size=10)
                          
    logging.info('Train elapsed time: %.2f' % (time.time() - t1))
    
    model.save(output_path + '/model')
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file,
                    output_path, qy_only, max_length, **kwargs):

    set_float_cpu('float32')

    sr_args = SR.filter_eval_args(**kwargs)

    if preproc_file is not None:
        preproc = TransformList.load(preproc_file)
    else:
        preproc = None

    gmm = DiagGMM.load_from_kaldi(gmm_file)

    sr = SR(seq_file,
            file_list,
            batch_size=1,
            shuffle_seqs=False,
            preproc=preproc,
            **sr_args)

    t1 = time.time()

    # if qy_only:
    #     model = TVAEY.load(model_file)
    # else:
    model = TVAEYZ.load(model_file)

    #model.build(max_seq_length=sr.max_batch_seq_length)
    model.build(max_seq_length=1)

    max_length = np.minimum(sr.max_batch_seq_length, max_length)

    y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras())
    xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras())
    rr = np.zeros((1, max_length, model.r_dim), dtype=float_keras())
    keys = []

    xp = Input(shape=(
        max_length,
        model.x_dim,
    ))
    rp = Input(shape=(
        max_length,
        model.r_dim,
    ))
    qy_param = model.qy_net([xp, rp])
    qy_net = Model([xp, rp], qy_param)

    for i in xrange(sr.num_seqs):
        ti1 = time.time()
        x, key = sr.read_next_seq()
        ti2 = time.time()
        r = gmm.compute_z(x)
        ti3 = time.time()
        logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' %
                     (i, sr.num_seqs, key, x.shape[0]))
        keys.append(key)
        xx[:, :, :] = 0
        rr[:, :, :] = 0

        if x.shape[0] <= max_length:
            xx[0, :x.shape[0]] = x
            rr[0, :x.shape[0]] = r
            y[i] = qy_net.predict([xx, rr], batch_size=1)[0]
        else:
            num_batches = int(np.ceil(x.shape[0] / max_length))
            for j in xrange(num_batches - 1):
                start = j * max_length
                xx[0] = x[start:start + max_length]
                rr[0] = r[start:start + max_length]
                y[i] += qy_net.predict([xx, rr], batch_size=1)[0].ravel()
            xx[0] = x[-max_length:]
            rr[0] = r[-max_length:]
            y[i] += qy_net.predict([xx, rr], batch_size=1)[0].ravel()
            y[i] /= num_batches

        ti4 = time.time()
        logging.info(
            'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f'
            %
            (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti3 - ti2, ti4 - ti3))

    logging.info('Extract elapsed time: %.2f' % (time.time() - t1))

    hw = HypDataWriter(output_path)
    hw.write(keys, '', y)