def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, plda_type, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() model = F.load_plda(plda_type, model_file) t1 = time.time() scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = x_e.shape[0] * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def train_mvn(iv_file, train_list, preproc_file, name, save_tlist, append_tlist, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vr_args = VR.filter_args(**kwargs) vr = VR(iv_file, train_list, preproc, **vr_args) x = vr.read() t1 = time.time() model = MVN(name=name) model.fit(x) logging.info('Elapsed time: %.2f s.' % (time.time() - t1)) if save_tlist: if append_tlist and preproc is not None: preproc.append(model) model = preproc else: model = TransformList(model) model.save(output_path)
def eval_cos(iv_file, ndx_file, enroll_file, test_file, preproc_file, score_file, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() lnorm = LNorm() x_e = lnorm.predict(x_e) x_t = lnorm.predict(x_t) t1 = time.time() scores = np.dot(x_e, x_t.T) dt = time.time() - t1 num_trials = x_e.shape[0] * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def plot_vector_hist(iv_file, v_list, preproc_file, output_path, num_bins, normed, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vr_args = VR.filter_args(**kwargs) vr = VR(iv_file, v_list, preproc, **vr_args) x = vr.read() t1 = time.time() if not os.path.exists(output_path): os.makedirs(ouput_path) for i in xrange(x.shape[1]): fig_file = '%s/D%04d.pdf' % (output_path, i) plt.hist(x[:, i], num_bins, normed=normed) plt.xlabel('Dim %d' % i) plt.grid(True) plt.show() plt.savefig(fig_file) plt.clf() logging.info('Elapsed time: %.2f s.' % (time.time() - t1))
def train_plda(iv_file, train_list, val_list, preproc_file, epochs, ml_md, md_epochs, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vcr_args = VCR.filter_args(**kwargs) vcr_train = VCR(iv_file, train_list, preproc, **vcr_args) x, class_ids = vcr_train.read() x_val = None class_ids_val = None if val_list is not None: vcr_val = VCR(iv_file, val_list, preproc, **vcr_args) x_val, class_ids_val = vcr_val.read() t1 = time.time() plda_args = F.filter_train_args(**kwargs) model = F.create_plda(**plda_args) elbos = model.fit(x, class_ids, x_val=x_val, class_ids_val=class_ids_val, epochs=epochs, ml_md=ml_md, md_epochs=md_epochs) logging.info('Elapsed time: %.2f s.' % (time.time()-t1)) model.save(output_path) elbo = np.vstack(elbos) num = np.arange(epochs) elbo = np.vstack((num, elbo)).T elbo_path=os.path.splitext(output_path)[0] + '.csv' np.savetxt(elbo_path, elbo, delimiter=',')
def train_gauss(iv_file, train_list, preproc_file, save_tlist, append_tlist, input_path, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vr_args = VR.filter_args(**kwargs) vr = VR(iv_file, train_list, preproc, **vr_args) x = vr.read() t1 = time.time() model_args = Gaussianizer.filter_args(**kwargs) model = load_model(input_path, **model_args) model.fit(x) if save_tlist: if append_tlist and preproc is not None: preproc.append(model) model = preproc else: model = TransformList(model) model.save(output_path)
def eval_plda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, pool_method, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdr_args) x_e, x_t, enroll, ndx = tdr.read() enroll, ids_e = np.unique(enroll, return_inverse=True) model = F.load_plda(plda_type, model_file) t1 = time.time() scores = model.llr_Nvs1(x_e, x_t, method=pool_method, ids1=ids_e) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt / num_trials * 1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def eval_svm(iv_file, class2int_file, test_file, preproc_file, model_file, score_file, vector_score_file, eval_type, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, class2int_file, test_file, preproc, **tdr_args) x, ndx = tdr.read() model = SVM.load(model_file) t1 = time.time() scores = model.predict(x, eval_type) dt = time.time() - t1 num_trials = scores.shape[0]*scores.shape[1] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(ndx.model_set, ndx.seg_set, scores.T) s.save(score_file) if vector_score_file is not None: h5 = HDW(vector_score_file) h5.write(ndx.seg_set, '', scores)
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file, preproc_file, model_file, score_file, plda_type, **kwargs): logging.info('loading data') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc) x_e, x_t, enroll, ndx, orig_seg = tdr.read() logging.info('loading plda model: %s' % (model_file)) model = F.load_plda(plda_type, model_file) t1 = time.time() logging.info('computing llr') scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) logging.info('combine cluster scores') scores = combine_diar_scores(ndx, orig_seg, scores) logging.info('saving scores to %s' % (score_file)) s = TrialScores(enroll, ndx.seg_set, scores) s = s.align_with_ndx(ndx) s.save_txt(score_file)
def train_lda(iv_file, train_list, preproc_file, lda_dim, name, save_tlist, append_tlist, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vcr_args = VCR.filter_args(**kwargs) vcr = VCR(iv_file, train_list, preproc, **vcr_args) x, class_ids = vcr.read() t1 = time.time() model = LDA(lda_dim=lda_dim, name=name) model.fit(x, class_ids) logging.info('Elapsed time: %.2f s.' % (time.time() - t1)) x = model.predict(x) s_mat = SbSw() s_mat.fit(x, class_ids) logging.debug(s_mat.Sb[:4, :4]) logging.debug(s_mat.Sw[:4, :4]) if save_tlist: if append_tlist and preproc is not None: preproc.append(model) model = preproc else: model = TransformList(model) model.save(output_path)
def eval_plda(iv_file, ndx_file, enroll_file, test_subseg2orig_file, preproc_file, coh_iv_file, coh_list, coh_nbest, coh_nbest_discard, model_file, score_file, plda_type, **kwargs): logging.info('loading data') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr = TDR(iv_file, ndx_file, enroll_file, None, test_subseg2orig_file, preproc) x_e, x_t, enroll, ndx, orig_seg = tdr.read() logging.info('loading plda model: %s' % (model_file)) model = F.load_plda(plda_type, model_file) t1 = time.time() logging.info('computing llr') scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info( 'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' % (dt, dt / num_trials * 1000)) logging.info('loading cohort data') vr = VR(coh_iv_file, coh_list, preproc) x_coh = vr.read() t2 = time.time() logging.info('score cohort vs test') scores_coh_test = model.llr_1vs1(x_coh, x_t) logging.info('score enroll vs cohort') scores_enr_coh = model.llr_1vs1(x_e, x_coh) dt = time.time() - t2 logging.info('cohort-scoring elapsed time: %.2f s.' % (dt)) t2 = time.time() logging.info('apply s-norm') snorm = SNorm(nbest=coh_nbest, nbest_discard=coh_nbest_discard) scores = snorm.predict(scores, scores_coh_test, scores_enr_coh) dt = time.time() - t2 logging.info('s-norm elapsed time: %.2f s.' % (dt)) dt = time.time() - t1 logging.info( ('total-scoring elapsed time: %.2f s. ' 'elapsed time per trial: %.2f ms.') % (dt, dt / num_trials * 1000)) logging.info('combine cluster scores') scores = combine_diar_scores(ndx, orig_seg, scores) logging.info('saving scores to %s' % (score_file)) s = TrialScores(enroll, ndx.seg_set, scores) s = s.align_with_ndx(ndx) s.save_txt(score_file)
def extract_embed(seq_file, model_file, preproc_file, output_path, max_seq_length, pooling_output, write_format, **kwargs): set_float_cpu('float32') sr_args = SDRF.filter_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SDRF.create(seq_file, transform=preproc, **sr_args) t1 = time.time() model = SeqQEmbed.load(model_file) model.build(max_seq_length) model.build_embed(pooling_output) y_dim = model.embed_dim _, seq_lengths = sr.read_num_rows() sr.reset() num_seqs = len(seq_lengths) p1_y = np.zeros((num_seqs, y_dim), dtype=float_keras()) p2_y = np.zeros((num_seqs, y_dim), dtype=float_keras()) keys = [] for i in xrange(num_seqs): ti1 = time.time() key, data = sr.read(1) ti2 = time.time() logging.info('Extracting embeddings %d/%d for %s, num_frames: %d' % (i, num_seqs, key[0], data[0].shape[0])) keys.append(key[0]) p1_y[i], p2_y[i] = model.predict_embed(data[0]) ti4 = time.time() logging.info('Elapsed time embeddings %d/%d for %s, total: %.2f read: %.2f, vae: %.2f' % (i, num_seqs, key, ti4-ti1, ti2-ti1, ti4-ti2)) logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) if write_format == 'p1': y = p1_y elif write_format == 'p1+p2': y = np.hstack((p1_y, p2_y)) else: y = p2_y hw = DWF.create(output_path) hw.write(keys, y)
def load_model(input_path, **kwargs): if input_path is None: return Gaussianizer(**kwargs) try: return Gaussianizer.load(input_path) except: tfl = TransformList.load(input_path) for tf in tfl.transforms: if tf.name == name: return tf
def load_model(input_path, name, **kwargs): if input_path is None: return PCA(name=name, **kwargs) try: return PCA.load(input_path) except: tfl = TransformList.load(input_path) for tf in tfl.transforms: if tf.name == name: return tf
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file, output_path, qy_only, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None gmm = DiagGMM.load_from_kaldi(gmm_file) sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() # if qy_only: # model = TVAEY.load(model_file) # else: model = TVAEYZ.load(model_file) model.build(max_seq_length=sr.max_batch_seq_length) y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras()) xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim), dtype=float_keras()) rr = np.zeros((1, sr.max_batch_seq_length, model.r_dim), dtype=float_keras()) keys = [] for i in xrange(sr.num_seqs): ti1 = time.time() x, key = sr.read_next_seq() ti2 = time.time() r = gmm.compute_z(x) ti3 = time.time() logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) keys.append(key) xx[:,:,:] = 0 rr[:,:,:] = 0 xx[0,:x.shape[0]] = x rr[0,:x.shape[0]] = r y[i] = model.compute_qy_x([xx, rr], batch_size=1)[0] ti4 = time.time() logging.info('Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f' % (i, sr.num_seqs, key, ti4-ti1, ti2-ti1, ti3-ti2, ti4-ti3)) logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)
def extract_ivector(seq_file, file_list, model_file, preproc_file, output_path, qy_only, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() if qy_only: model = TVAEY.load(model_file) else: model = TVAEYZ.load(model_file) model.build(max_seq_length=sr.max_batch_seq_length) logging.info(time.time() - t1) logging.info(model.y_dim) y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras()) xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim), dtype=float_keras()) keys = [] for i in xrange(sr.num_seqs): x, key = sr.read_next_seq() logging.info('Extracting i-vector %d/%d for %s\n' % (i, sr.num_seqs, key)) keys.append(key) xx[:, :, :] = 0 xx[0, :x.shape[0]] = x y[i] = model.compute_qy_x(xx, batch_size=1)[0] logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)
def load_model(input_path, with_lnorm, name, **kwargs): if input_path is None: if with_lnorm: return LNorm(name=name, **kwargs) else: return CentWhiten(name=name, **kwargs) try: if with_lnorm: return LNorm.load(input_path) else: return CentWhiten(input_path) except: tfl = TransformList.load(input_path) for tf in tfl.transforms: if tf.name == name: return tf
def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vcr_args = VCR.filter_args(**kwargs) vcr_train = VCR(iv_file, train_list, preproc, **vcr_args) x, class_ids = vcr_train.read() t1 = time.time() model_args = GBE.filter_train_args(**kwargs) model = GBE(**model_args) model.fit(x, class_ids) logging.info('Elapsed time: %.2f s.' % (time.time() - t1)) model.save(output_path)
def eval_elbo(seq_file, file_list, model_file, preproc_file, output_file, ubm_type, **kwargs): sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() if ubm_type == 'diag-gmm': model = DiagGMM.load(model_file) else: model = DiagGMM.load_from_kaldi(model_file) model.initialize() elbo = np.zeros((sr.num_seqs, ), dtype=float_cpu()) num_frames = np.zeros((sr.num_seqs, ), dtype=int) keys = [] for i in xrange(sr.num_seqs): x, key = sr.read_next_seq() keys.append(key) elbo[i] = model.elbo(x) num_frames[i] = x.shape[0] num_total_frames = np.sum(num_frames) total_elbo = np.sum(elbo) total_elbo_norm = total_elbo / num_total_frames logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) s = 'Total ELBO: %f\nELBO_NORM %f' % (total_elbo, total_elbo_norm) logging.info(s) with open(output_file, 'w') as f: f.write(s)
def compute_gmm_post(seq_file, file_list, model_file, preproc_file, output_path, num_comp, **kwargs): sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None gmm = DiagGMM.load_from_kaldi(model_file) sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() logging.info(time.time() - t1) index = np.zeros((sr.num_seqs, num_comp), dtype=int) hw = HypDataWriter(output_path) for i in xrange(sr.num_seqs): x, key = sr.read_next_seq() logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) r = gmm.compute_z(x) r_s, index = to_sparse(r, num_comp) if i == 0: r2 = to_dense(r_s, index, r.shape[1]) logging.degug(np.sort(r[0, :])[-12:]) logging.degug(np.sort(r2[0, :])[-12:]) logging.degug(np.argsort(r[0, :])[-12:]) logging.degug(np.argsort(r2[0, :])[-12:]) hw.write([key], '.r', [r_s]) hw.write([key], '.index', [index]) logging.info('Extract elapsed time: %.2f' % (time.time() - t1))
def eval_pdda(iv_file, ndx_file, enroll_file, test_file, preproc_file, model_file, score_file, pool_method, eval_method, num_samples_y, num_samples_z, num_samples_elbo, qy_only, **kwargs): set_float_cpu('float32') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr_args = TDR.filter_args(**kwargs) tdr = TDR(iv_file, ndx_file, enroll_file, test_file, preproc, **tdt_args) x_e, x_t, enroll, ndx = tdr.read() enroll, ids_e = np.unique(enroll, return_inverse=True) if qy_only: model = TVAEY.load(model_file) model.build(max_seq_length=2, num_samples=num_samples_y) else: model = TVAEYZ.load(model_file) model.build(max_seq_length=2, num_samples_y=num_samples_y, num_samples_z=num_samples_z) t1 = time.time() scores = model.eval_llr_Nvs1(x_e, ids_e, x_t, pool_method=pool_method, eval_method=eval_method, num_samples=num_samples_elbo) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info('Elapsed time: %.2f s. Elapsed time per trial: %.2f ms.' % (dt, dt/num_trials*1000)) s = TrialScores(enroll, ndx.seg_set, scores) s.save(score_file)
def train_cw(iv_file, train_list, preproc_file, with_lnorm, save_tlist, append_tlist, input_path, output_path, **kwargs): if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None vr_args = VR.filter_args(**kwargs) vr = VR(iv_file, train_list, preproc, **vr_args) x = vr.read() t1 = time.time() model_args = CentWhiten.filter_args(**kwargs) model = load_model(input_path, with_lnorm, **model_args) model.fit(x) logging.info('Elapsed time: %.2f s.' % (time.time()-t1)) x = model.predict(x) gauss=Normal(x_dim=x.shape[1]) gauss.fit(x=x) logging.debug(gauss.mu[:4]) logging.debug(gauss.Sigma[:4,:4]) if save_tlist: if append_tlist and preproc is not None: preproc.append(model) model = preproc else: model = TransformList(model) model.save(output_path)
def tracking_plda(iv_file, ndx_file, enroll_file, segments_file, preproc_file, model_file, rttm_file, plda_type, **kwargs): logging.info('loading data') if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None tdr = TDR(iv_file, ndx_file, enroll_file, segments_file, preproc) x_e, x_t, enroll, ndx_seg, ext_segments = tdr.read() logging.info('loading plda model: %s' % (model_file)) model = F.load_plda(plda_type, model_file) t1 = time.time() logging.info('computing llr') scores = model.llr_1vs1(x_e, x_t) dt = time.time() - t1 num_trials = len(enroll) * x_t.shape[0] logging.info( 'scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.' % (dt, dt / num_trials * 1000)) scores = TrialScores(enroll, ndx_seg.seg_set, scores) new_ext_segment_ids, ext_segment_ids, model_ids, scores = flatten_segment_scores( ndx_seg, scores) new_ext_segments = prepare_output_ext_segments(ext_segments, new_ext_segment_ids, ext_segment_ids, model_ids, scores) new_ext_segments.save(rttm_file + '_es') rttm = RTTM.create_spkdiar_from_ext_segments(new_ext_segments) rttm.save(rttm_file)
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file, output_path, qy_only, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None gmm = DiagGMM.load_from_kaldi(gmm_file) sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() # if qy_only: # model = TVAEY.load(model_file) # else: model = TVAEYZ.load(model_file) #model.build(max_seq_length=sr.max_batch_seq_length) #model.build(max_seq_length=1) model.x_dim = 60 model.r_dim = 2048 model.y_dim = 400 y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras()) xx = np.zeros((1, sr.max_batch_seq_length, model.x_dim), dtype=float_keras()) rr = np.zeros((1, sr.max_batch_seq_length, model.r_dim), dtype=float_keras()) keys = [] xp = Input(shape=( sr.max_batch_seq_length, model.x_dim, )) rp = Input(shape=( sr.max_batch_seq_length, model.r_dim, )) qy_param = model.qy_net([xp, rp]) qy_net = Model([xp, rp], qy_param) for i in xrange(sr.num_seqs): ti1 = time.time() x, key = sr.read_next_seq() ti2 = time.time() r = gmm.compute_z(x) ti3 = time.time() logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) keys.append(key) # xp = Input(shape=(x.shape[0], model.x_dim,)) # rp = Input(shape=(x.shape[0], model.r_dim,)) # qy_param = model.qy_net([xp, rp]) ti5 = time.time() xx[:, :, :] = 0 rr[:, :, :] = 0 xx[0, :x.shape[0]] = x rr[0, :x.shape[0]] = r # x = np.expand_dims(x, axis=0) # r = np.expand_dims(r, axis=0) # qy_net = Model([xp, rp], qy_param) y[i] = qy_net.predict([xx, rr], batch_size=1)[0] # del qy_net # y[i] = model.compute_qy_x2([x, r], batch_size=1)[0] #for i in xrange(10): #gc.collect() ti4 = time.time() logging.info( 'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f qy: %.2f' % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti3 - ti2, ti4 - ti5, ti5 - ti3)) # print('Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f' % # (i, sr.num_seqs, key, ti4-ti1, ti2-ti1, ti3-ti2, ti4-ti3)) logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)
def extract_ivector(seq_file, file_list, post_file, model_file, preproc_file, output_path, qy_only, max_length, layer_name, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SR(seq_file, file_list, post_file, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() # if qy_only: # model = TVAEY.load(model_file) # else: model = TVAEYZ.load(model_file) pt_input = model.pt_net.input pt_output = model.pt_net.get_layer(layer_name).output pt_dim = model.pt_net.get_layer(layer_name).output_shape[-1] #model.build(max_seq_length=sr.max_batch_seq_length) model.build(max_seq_length=1) max_length = np.minimum(sr.max_batch_seq_length, max_length) y = np.zeros((sr.num_seqs, pt_dim), dtype=float_keras()) xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras()) rr = np.zeros((1, max_length, model.r_dim), dtype=float_keras()) keys = [] xp = Input(shape=( max_length, model.x_dim, )) rp = Input(shape=( max_length, model.r_dim, )) qy_param = model.qy_net([xp, rp]) pt_net = Model(pt_input, pt_output) emb = pt_net(qy_param[0]) emb_net = Model([xp, rp], emb) model.pt_net.summary() pt_net.summary() emb_net.summary() logging.info(layer_name) #emb_net = Model([xp, rp], pt_net.get_layer('pt').get_layer(layer_name).output) #pt_net = Model(model.pt_net.input, model.pt_net.get_layer(layer_name).output) # emb = pt_net(qy_param[0]) #emb_net = Model([xp, rp], emb) for i in xrange(sr.num_seqs): ti1 = time.time() x, r, key = sr.read_next_seq() ti2 = time.time() logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) keys.append(key) xx[:, :, :] = 0 rr[:, :, :] = 0 if x.shape[0] <= max_length: xx[0, :x.shape[0]] = x rr[0, :x.shape[0]] = r y[i] = emb_net.predict([xx, rr], batch_size=1) else: num_batches = int(np.ceil(x.shape[0] / max_length)) for j in xrange(num_batches - 1): start = j * max_length xx[0] = x[start:start + max_length] rr[0] = r[start:start + max_length] y[i] += emb_net.predict([xx, rr], batch_size=1).ravel() xx[0] = x[-max_length:] rr[0] = r[-max_length:] y[i] += emb_net.predict([xx, rr], batch_size=1).ravel() y[i] /= num_batches ti4 = time.time() logging.info( 'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, vae: %.2f' % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti4 - ti2)) logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)
def train_embed(data_path, train_list, val_list, px_net_path, pt_net_path, qy_net_path, qz_net_path, init_path, epochs, preproc_file, output_path, freeze_embed, **kwargs): g = reserve_gpu() set_float_cpu(float_keras()) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sg_args = G.filter_args(**kwargs) sg = G(data_path, train_list, shuffle_seqs=True, reset_rng=False, transform=preproc, **sg_args) max_length = sg.max_seq_length gen_val = None if val_list is not None: sg_val = G(data_path, val_list, transform=preproc, shuffle_seqs=False, reset_rng=True, **sg_args) max_length = max(max_length, sg_val.max_seq_length) gen_val = data_generator(sg_val, max_length) gen_train = data_generator(sg, max_length) if init_path is None: model, init_epoch = KML.load_checkpoint(output_path, epochs) if model is None: embed_args = VAE.filter_args(**kwargs) logging.debug(embed_args) px_net = load_model_arch(px_net_path) qy_net = load_model_arch(qy_net_path) qz_net = load_model_arch(qz_net_path) pt_net = load_model_arch(pt_net_path) model = VAE(px_net, qy_net, qz_net, pt_net, **embed_args) else: sg.cur_epoch = init_epoch sg.reset() else: logging.info('loading init model: %s' % init_path) model = KML.load(init_path) model.px_weight = kwargs['px_weight'] model.pt_weight = kwargs['pt_weight'] model.kl_qy_weight = kwargs['kl_qy_weight'] model.kl_qz_weight = kwargs['kl_qz_weight'] opt_args = KOF.filter_args(**kwargs) cb_args = KCF.filter_args(**kwargs) logging.debug(sg_args) logging.debug(opt_args) logging.debug(cb_args) logging.info('max length: %d' % max_length) t1 = time.time() if freeze_embed: model.prepool_net.trainable = False model.build(max_length) logging.info(time.time() - t1) cb = KCF.create_callbacks(model, output_path, **cb_args) opt = KOF.create_optimizer(**opt_args) model.compile(optimizer=opt) h = model.fit_generator(gen_train, validation_data=gen_val, steps_per_epoch=sg.steps_per_epoch, validation_steps=sg_val.steps_per_epoch, initial_epoch=sg.cur_epoch, epochs=epochs, callbacks=cb, max_queue_size=10) logging.info('Train elapsed time: %.2f' % (time.time() - t1)) model.save(output_path + '/model')
def extract_embed(seq_file, file_list, model_file, preproc_file, output_path, max_length, layer_names, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() model = SeqEmbed.load(model_file) model.build() print(layer_names) model.build_embed(layer_names) y_dim = model.embed_dim max_length = np.minimum(sr.max_batch_seq_length, max_length) y = np.zeros((sr.num_seqs, y_dim), dtype=float_keras()) xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras()) keys = [] for i in xrange(sr.num_seqs): ti1 = time.time() x, key = sr.read_next_seq() ti2 = time.time() print('Extracting embeddings %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) keys.append(key) xx[:, :, :] = 0 if x.shape[0] <= max_length: xx[0, :x.shape[0]] = x y[i] = model.predict_embed(xx, batch_size=1) else: num_chunks = int(np.ceil(float(x.shape[0]) / max_length)) chunk_size = int(np.ceil(float(x.shape[0]) / num_chunks)) for j in xrange(num_chunks - 1): start = j * chunk_size xx[0, :chunk_size] = x[start:start + chunk_size] y[i] += model.predict_embed(xx, batch_size=1).ravel() xx[0, :chunk_size] = x[-chunk_size:] y[i] += model.predict_embed(xx, batch_size=1).ravel() y[i] /= num_chunks ti4 = time.time() print( 'Elapsed time embeddings %d/%d for %s, total: %.2f read: %.2f, vae: %.2f' % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti4 - ti2)) print('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)
def train_embed(seq_file, train_list, val_list, class_list, embed_file, init_path, epochs, batch_size, preproc_file, output_path, post_pdf, pooling_input, pooling_output, min_var, **kwargs): set_float_cpu(float_keras()) sr_args = SR.filter_args(**kwargs) sr_val_args = SR.filter_val_args(**kwargs) opt_args = KOF.filter_args(**kwargs) cb_args = KCF.filter_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sr = SR(seq_file, train_list, class_list, batch_size=batch_size, preproc=preproc, **sr_args) max_length = sr.max_batch_seq_length gen_val = None if val_list is not None: sr_val = SR(seq_file, val_list, class_list, batch_size=batch_size, preproc=preproc, shuffle_seqs=False, seq_split_mode='sequential', seq_split_overlap=0, reset_rng=True, **sr_val_args) max_length = max(max_length, sr_val.max_batch_seq_length) gen_val = data_generator(sr_val, max_length) gen_train = data_generator(sr, max_length) t1 = time.time() if init_path is None: embed_net = load_model_arch(embed_file) model = SeqMetaEmbed(embed_net, num_classes=sr.num_classes, post_pdf=post_pdf, pooling_input=pooling_input, pooling_output=pooling_output, min_var=min_var) else: logging.info('loading init model: %s' % init_path) model = SeqMetaEmbed.load(init_path) logging.info('max length: %d' % max_length) model.build(max_length) logging.info(time.time() - t1) cb = KCF.create_callbacks(model, output_path, **cb_args) opt = KOF.create_optimizer(**opt_args) model.compile(optimizer=opt) h = model.fit_generator(gen_train, validation_data=gen_val, steps_per_epoch=sr.num_batches, validation_steps=sr_val.num_batches, epochs=epochs, callbacks=cb, max_queue_size=10) logging.info('Train elapsed time: %.2f' % (time.time() - t1)) model.save(output_path + '/model')
def train_embed(data_path, train_list, val_list, train_list_adapt, val_list_adapt, prepool_net_path, postpool_net_path, init_path, epochs, preproc_file, output_path, freeze_prepool, freeze_postpool_layers, **kwargs): set_float_cpu(float_keras()) if init_path is None: model, init_epoch = KML.load_checkpoint(output_path, epochs) if model is None: emb_args = SeqEmbed.filter_args(**kwargs) prepool_net = load_model_arch(prepool_net_path) postpool_net = load_model_arch(postpool_net_path) model = SeqEmbed(prepool_net, postpool_net, loss='categorical_crossentropy', **emb_args) else: kwargs['init_epoch'] = init_epoch else: logging.info('loading init model: %s' % init_path) model = KML.load(init_path) sg_args = G.filter_args(**kwargs) opt_args = KOF.filter_args(**kwargs) cb_args = KCF.filter_args(**kwargs) logging.debug(sg_args) logging.debug(opt_args) logging.debug(cb_args) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None sg = G(data_path, train_list, train_list_adapt, shuffle_seqs=True, reset_rng=False, transform=preproc, **sg_args) max_length = sg.max_seq_length gen_val = None if val_list is not None: sg_val = G(data_path, val_list, val_list_adapt, transform=preproc, shuffle_seqs=False, reset_rng=True, **sg_args) max_length = max(max_length, sg_val.max_seq_length) gen_val = data_generator(sg_val, max_length) gen_train = data_generator(sg, max_length) logging.info('max length: %d' % max_length) t1 = time.time() if freeze_prepool: model.freeze_prepool_net() if freeze_postpool_layers is not None: model.freeze_postpool_net_layers(freeze_postpool_layers) model.build(max_length) cb = KCF.create_callbacks(model, output_path, **cb_args) opt = KOF.create_optimizer(**opt_args) model.compile(optimizer=opt) h = model.fit_generator(gen_train, validation_data=gen_val, steps_per_epoch=sg.steps_per_epoch, validation_steps=sg_val.steps_per_epoch, initial_epoch=sg.cur_epoch, epochs=epochs, callbacks=cb, max_queue_size=10) logging.info('Train elapsed time: %.2f' % (time.time() - t1)) model.save(output_path + '/model')
def extract_ivector(seq_file, file_list, gmm_file, model_file, preproc_file, output_path, qy_only, max_length, **kwargs): set_float_cpu('float32') sr_args = SR.filter_eval_args(**kwargs) if preproc_file is not None: preproc = TransformList.load(preproc_file) else: preproc = None gmm = DiagGMM.load_from_kaldi(gmm_file) sr = SR(seq_file, file_list, batch_size=1, shuffle_seqs=False, preproc=preproc, **sr_args) t1 = time.time() # if qy_only: # model = TVAEY.load(model_file) # else: model = TVAEYZ.load(model_file) #model.build(max_seq_length=sr.max_batch_seq_length) model.build(max_seq_length=1) max_length = np.minimum(sr.max_batch_seq_length, max_length) y = np.zeros((sr.num_seqs, model.y_dim), dtype=float_keras()) xx = np.zeros((1, max_length, model.x_dim), dtype=float_keras()) rr = np.zeros((1, max_length, model.r_dim), dtype=float_keras()) keys = [] xp = Input(shape=( max_length, model.x_dim, )) rp = Input(shape=( max_length, model.r_dim, )) qy_param = model.qy_net([xp, rp]) qy_net = Model([xp, rp], qy_param) for i in xrange(sr.num_seqs): ti1 = time.time() x, key = sr.read_next_seq() ti2 = time.time() r = gmm.compute_z(x) ti3 = time.time() logging.info('Extracting i-vector %d/%d for %s, num_frames: %d' % (i, sr.num_seqs, key, x.shape[0])) keys.append(key) xx[:, :, :] = 0 rr[:, :, :] = 0 if x.shape[0] <= max_length: xx[0, :x.shape[0]] = x rr[0, :x.shape[0]] = r y[i] = qy_net.predict([xx, rr], batch_size=1)[0] else: num_batches = int(np.ceil(x.shape[0] / max_length)) for j in xrange(num_batches - 1): start = j * max_length xx[0] = x[start:start + max_length] rr[0] = r[start:start + max_length] y[i] += qy_net.predict([xx, rr], batch_size=1)[0].ravel() xx[0] = x[-max_length:] rr[0] = r[-max_length:] y[i] += qy_net.predict([xx, rr], batch_size=1)[0].ravel() y[i] /= num_batches ti4 = time.time() logging.info( 'Elapsed time i-vector %d/%d for %s, total: %.2f read: %.2f, gmm: %.2f, vae: %.2f' % (i, sr.num_seqs, key, ti4 - ti1, ti2 - ti1, ti3 - ti2, ti4 - ti3)) logging.info('Extract elapsed time: %.2f' % (time.time() - t1)) hw = HypDataWriter(output_path) hw.write(keys, '', y)