def testMatrixReadWrite(self): """ Test read/write for float matrices. """ # read, flt_mat = { k:m for k,m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp, flt_mat2 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark, flt_mat3 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark, # store, with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark','wb') as f: for k,m in flt_mat3.items(): kaldi_io.write_mat(f, m, k) # read and compare, for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'): self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")
def testReading(self): t_beg = timeit.default_timer() orig = {k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark')} t_read_not_compressed = timeit.default_timer() - t_beg t_beg = timeit.default_timer() comp = {k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_compressed.ark')} t_read_compressed = timeit.default_timer() - t_beg # reading the compressed data should be <5x slower, self.assertLess(t_read_compressed, 5.*t_read_not_compressed) # check that the values are similar # (these are not identical due to discretization in compression), for key in orig.keys(): self.assertGreater(1e-4, np.max(np.abs(comp[key]-orig[key])))
def load_dataset(fea_scp,fea_opts,lab_folder,lab_opts,left,right): fea= { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats scp:'+fea_scp+' ark:- |'+fea_opts) } lab= { k:v for k,v in kaldi_io.read_vec_int_ark('gunzip -c '+lab_folder+'/ali*.gz | '+lab_opts+' '+lab_folder+'/final.mdl ark:- ark:-|') if k in fea} # Note that I'm copying only the aligments of the loaded fea fea={k: v for k, v in fea.items() if k in lab} # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded") count=0 end_snt=0 end_index=[] snt_name=[] for k in sorted(fea.keys(), key=lambda k: len(fea[k])): if count==0: count=1 fea_conc=fea[k] lab_conc=lab[k] end_snt=end_snt+fea[k].shape[0]-left else: fea_conc=np.concatenate([fea_conc,fea[k]],axis=0) lab_conc=np.concatenate([lab_conc,lab[k]],axis=0) end_snt=end_snt+fea[k].shape[0] end_index.append(end_snt) snt_name.append(k) end_index[-1]=end_index[-1]-right return [snt_name,fea_conc,lab_conc,end_index]
def testPipeReadWrite(self): """ Test read/write for pipes. Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct. """ # the following line disables 'stderr' forwarding, comment it for DEBUG, with open("/dev/null","w") as sys.stderr: # read, flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') } # write to pipe, with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f: for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k) # read it again and compare, for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'): self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe") # read some other formats from pipe, i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') } flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) streams = powerset(list(np.arange(num_domains))) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] if config.use_gpu: priors = [torch.from_numpy(f).cuda().double() for f in priors] else: priors = [torch.from_numpy(f).double() for f in priors] all_pcx_models = nn.ModuleList(all_pcx_models) all_px_models = nn.ModuleList(all_px_models) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id all_pcx_models.cuda() all_px_models.cuda() if config.task_prior == "dp": print("using data based task priors") task_prior = "dp" else: task_prior = config.task_prior.split(',') task_prior = [float(tp) for tp in task_prior] post_dict = {} for utt_id, batch_x in kaldi_io.read_mat_ark(cmd): print("COMPUTING LOG-LIKELIHOOD FOR UTTERANCE {:s}".format(utt_id)) sys.stdout.flush() T = torch.DoubleTensor([300]) # Initial temperature T.requires_grad = True num_frames = batch_x.shape[0] batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :] batch_l = Variable(torch.IntTensor([batch_x.size(1)])) # Do forward passes through different models sm = torch.nn.Softmax(1) px_save = [] all_pcx = [] all_tp = torch.zeros(len(all_pcx_models), dtype=torch.double) for idx, model in enumerate(all_pcx_models): model.eval() out = model(batch_x, batch_l) ae_out, latent_out = all_px_models[idx](batch_x, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = torch.exp(vae_loss(batch_x[0, :, :], ae_out[0, :, :], latent_out)).double() px_save.append(torch.mean(px)) pcx = sm(out[0, :, :]) all_pcx.append(pcx.double()) if task_prior == "dp": all_tp[idx] = px_save[idx] else: all_tp[idx] = task_prior[idx] for it_num in range(config.num_iter): llh = compute_lhood(num_frames, num_classes, all_pcx, all_tp, priors, task_prior, streams, T) loss = -torch.mean(llh) print_log = "p(x|c) ={:.6f} with softmax temperature ={:.6f} ".format(loss.item(), T.item()) print(print_log) sys.stdout.flush() #loss.backward(retain_graph=True) print(T.grad) # with torch.no_grad(): # T = T + config.lr_rate * T.grad/torch.norm(T.grad,2) #T.requires_grad = True T = T + 100 if config.use_gpu: post_dict[utt_id] = llh.cpu().data.numpy() else: post_dict[utt_id] = llh.data.numpy() return post_dict
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] post_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): post = np.zeros((mat.shape[0], num_classes)) pxx = np.zeros((num_domains, mat.shape[0], num_classes)) prior_acc = np.zeros((mat.shape[0], num_classes)) mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) all_pcx = [] all_px = [] for idx, model in enumerate(all_pcx_models): model.eval() out = model(mat, batch_l) ae_out, latent_out = all_px_models[idx](mat, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = np.exp(vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy()) pcx = sm(out[0, :, :]) px = np.tile(px, (pcx.shape[1], 1)).T all_pcx.append(pcx.data.numpy()) all_px.append(px) pxx[idx] = px pxx = softmax(pxx) for idx, pcx in enumerate(all_pcx): post += pcx * all_px[idx] * pxx[idx] prior_acc += np.exp(np.tile(priors[idx], (pcx.shape[0], 1))) * pxx[idx] post_dict[utt_id] = np.log(post) - config.prior_weight * np.log(prior_acc) return post_dict
def get_output(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) vae = torch.load(nnet['vaeenc'], map_location=lambda storage, loc: storage) if nnet['vae_type'] == "modulation": if config.vae_arch == "cnn": in_channels = [int(x) for x in vae['in_channels'].split(',')] out_channels = [int(x) for x in vae['out_channels'].split(',')] kernel = tuple([int(x) for x in vae['kernel'].split(',')]) vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'], in_channels, out_channels, kernel, vae['nfilters'] * vae['nrepeats'], False) else: vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['nfilters'] * vae['nrepeats'], 0, False) model = nnetRNN(vae['nfilters'] * vae['nrepeats'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet["model_state_dict"]) elif nnet['vae_type'] == "arvae": ar_steps = vae['ar_steps'].split(',') ar_steps = [int(x) for x in ar_steps] ar_steps.append(0) vae_model = nnetARVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, len(ar_steps), False) model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet['model_state_dict']) else: if config.vae_arch == "cnn": in_channels = [int(x) for x in vae['in_channels'].split(',')] out_channels = [int(x) for x in vae['out_channels'].split(',')] kernel = tuple([int(x) for x in vae['kernel'].split(',')]) vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'], in_channels, out_channels, kernel, vae['bn_dim'], False) else: vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet['model_state_dict']) feats_config = pickle.load(open(config.egs_config, 'rb')) lsm = torch.nn.LogSoftmax(1) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) if config.prior: prior = pickle.load(open(config.prior, 'rb')) post_dict = {} model.eval() for utt_id, batch_x in kaldi_io.read_mat_ark(cmd): #print(batch_x.shape) if config.vae_arch == "cnn": batch_l = Variable(torch.IntTensor([batch_x.shape[0]])) batch_x = Variable(torch.FloatTensor(batch_x)) batch_x = batch_x[None, None, :, :] batch_x = torch.transpose(batch_x, 2, 3) _, batch_x = vae_model(batch_x) batch_x = torch.transpose(batch_x[0], 1, 2) else: batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :] batch_l = Variable(torch.IntTensor([batch_x.shape[1]])) _, batch_x = vae_model(batch_x, batch_l) batch_x = batch_x[0] print(batch_x.shape) batch_x = batch_x - torch.cat( batch_x.shape[1] * [torch.mean(batch_x, dim=1)[:, None, :]], dim=1) batch_x = batch_x / torch.sqrt( torch.cat( batch_x.shape[1] * [torch.var(batch_x, dim=1)[:, None, :]], dim=1)) batch_x = model(batch_x, batch_l) if config.prior: print(batch_x[0].shape) sys.stdout.flush() post_dict[utt_id] = lsm( batch_x[0, :, :]).data.numpy() - config.prior_weight * prior else: if config.add_softmax: post_dict[utt_id] = sm(batch_x[0, :, :]).data.numpy() else: post_dict[utt_id] = batch_x[0, :, :].data.numpy() return post_dict
def main(config='config/train.yaml', **kwargs): """Trains a model on the given features and vocab. :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG VALUE :returns: None """ config_parameters = parse_config_or_kwargs(config, **kwargs) outputdir = os.path.join( config_parameters['outputpath'], config_parameters['model'], datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')) try: os.makedirs(outputdir) except IOError: pass logger = genlogger(outputdir, 'train.log') logger.info("Storing data at: {}".format(outputdir)) logger.info("<== Passed Arguments ==>") # Print arguments into logs for line in pformat(config_parameters).split('\n'): logger.info(line) # seed setting seed=config_parameters['seed'] # 1~5 np.random.seed(seed) torch.manual_seed(seed) kaldi_string = parsecopyfeats( config_parameters['features'], **config_parameters['feature_args']) scaler = getattr(pre, config_parameters['scaler'])(**config_parameters['scaler_args']) logger.info("<== Estimating Scaler ({}) ==>".format(scaler.__class__.__name__)) inputdim = -1 for kid, feat in kaldi_io.read_mat_ark(kaldi_string): scaler.partial_fit(feat) inputdim = feat.shape[-1] assert inputdim > 0, "Reading inputstream failed" logger.info( "Features: {} Input dimension: {}".format( config_parameters['features'], inputdim)) logger.info("<== Labels ==>") # Can be label, DAT, DADA ... default is 'label' target_label_name = config_parameters.get('label_type', 'label') if target_label_name == 'label': label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label']) else: # 'DAT' or 'DADA' label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label', 'domain']) label_encoder = pre.LabelEncoder() if target_label_name == 'label': label_encoder.fit(label_df[target_label_name].values.astype(str)) # Labelencoder needs an iterable to work, so just put a list around it and fetch again the 0-th element ( just the encoded string ) label_df['class_encoded'] = label_df[target_label_name].apply(lambda x: label_encoder.transform([x])[0]) train_labels = label_df[['filename', 'class_encoded']].set_index('filename').loc[:, 'class_encoded'].to_dict() else: # 'DAT' or 'DADA' label_encoder_sub = pre.LabelEncoder() label_encoder.fit(label_df['label'].values.astype(str)) label_df['lab_encoded'] = label_df['label'].apply(lambda x: label_encoder.transform([x])[0]) label_encoder_sub.fit(label_df['domain'].values.astype(str)) label_df['domain_encoded'] = label_df['domain'].apply(lambda x: label_encoder_sub.transform([x])[0]) train_labels = label_df[['filename', 'lab_encoded', 'domain_encoded']].set_index('filename').to_dict('index') train_labels = {k:np.array(list(v.values())) for k, v in train_labels.items()} # outdomain outdomain = config_parameters['outdomain'] outdomain_label = label_encoder_sub.transform([outdomain])[0] logger.info("Outdomain: {}, Outdomain label: {}".format(outdomain, outdomain_label)) if target_label_name == 'label': train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, **config_parameters['dataloader_args']) else: #'DAT' or 'DADA' outdomain_train_dataloader, indomain_train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, outdomain_label=outdomain_label, **config_parameters['dataloader_args']) if target_label_name == 'label': model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim=len(label_encoder.classes_), **config_parameters['model_args']) else: # 'DAT' or 'DADA' model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim1=len(label_encoder.classes_), outputdim2=len(label_encoder_sub.classes_), **config_parameters['model_args']) logger.info("<== Model ==>") for line in pformat(model).split('\n'): logger.info(line) optimizer = getattr(torch.optim, config_parameters['optimizer'])(model.parameters(), **config_parameters['optimizer_args']) scheduler = getattr(torch.optim.lr_scheduler, config_parameters['scheduler'])(optimizer, **config_parameters['scheduler_args']) criterion = getattr(loss, config_parameters['loss'])(**config_parameters['loss_args']) trainedmodelpath = os.path.join(outputdir, 'model.th') model = model.to(device) criterion_improved = criterion_improver(config_parameters['improvecriterion']) header = [ 'Epoch', 'Lr', 'Loss(T)', 'Loss(CV)', "Acc(T)", "Acc(CV)", ] for line in tp.header(header, style='grid').split('\n'): logger.info(line) for epoch in range(1, config_parameters['epochs']+1): if target_label_name == 'label': train_loss, train_acc = runepoch(train_dataloader, None, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch) else: # 'DAT' or 'DADA' train_loss, train_acc = runepoch(outdomain_train_dataloader, indomain_train_dataloader, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch) cv_loss, cv_acc = runepoch(cv_dataloader, None, model, criterion, target_label_name, dotrain=False, epoch=epoch) logger.info( tp.row( (epoch,) + (optimizer.param_groups[0]['lr'],) + (str(train_loss), str(cv_loss), str(train_acc), str(cv_acc)), style='grid')) epoch_meanloss = cv_loss[0] if type(cv_loss)==tuple else cv_loss if epoch % config_parameters['saveinterval'] == 0: torch.save({'model': model, 'scaler': scaler, 'encoder': label_encoder, 'config': config_parameters}, os.path.join(outputdir, 'model_{}.th'.format(epoch))) # ReduceOnPlateau needs a value to work schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None scheduler.step(schedarg) if criterion_improved(epoch_meanloss): torch.save({'model': model, 'scaler': scaler, 'encoder': label_encoder, 'config': config_parameters}, trainedmodelpath) if optimizer.param_groups[0]['lr'] < 1e-7: break logger.info(tp.bottom(len(header), style='grid')) logger.info("Results are in: {}".format(outputdir))
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) streams = powerset(list(np.arange(num_domains))) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] if config.task_prior == "dp": print("using data based task priors") else: task_prior = config.task_prior.split(',') task_prior = [float(tp) for tp in task_prior] post_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): num = np.zeros((mat.shape[0], num_classes)) denom = np.zeros(num_classes) mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) px_save = [] all_pcx = [] all_px = [] all_tp = [] for idx, model in enumerate(all_pcx_models): model.eval() out = model(mat, batch_l) ae_out, latent_out = all_px_models[idx](mat, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = np.exp( vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy()) px_save.append(np.mean(px)) pcx = sm(out[0, :, :]) px = np.tile(px, (pcx.shape[1], 1)).T all_pcx.append(pcx.data.numpy()) all_px.append(np.ones(px.shape)) if config.task_prior == "dp": all_tp.append(px_save[idx]) else: all_tp.append(task_prior[idx]) if config.task_prior == "dp": all_tp = np.asarray(all_tp, dtype=np.float64) all_tp = np.exp(300 * all_tp) / np.sum(np.exp(300 * all_tp)) for idx, st in enumerate(streams): num_prod = np.ones((num.shape[0], num_classes)) denom_prod = np.ones(num_classes) perf_mon = 1 for b in st: num_prod *= all_pcx[b] # np.power(all_pcx[b], all_tp[b]) perf_mon *= all_tp[b] denom_prod *= np.exp(priors[b]) denom_prod /= np.sum(denom_prod) num_prod = num_prod / np.tile( np.sum(num_prod, axis=1)[:, None], (1, num_prod.shape[1])) num += num_prod * perf_mon denom += denom_prod print_log = "" for ii, x in enumerate(px_save): print_log += "p(x) for Task {:d} ={:.6f} with prior ={:.6f} ".format( ii, x, all_tp[ii]) print(print_log) sys.stdout.flush() post_dict[utt_id] = np.log(num) - config.prior_weight * np.log(denom) return post_dict
#!/bin/env python import numpy as np import kaldi_io import timeit t_beg = timeit.default_timer() orig = {k: m for k, m in kaldi_io.read_mat_ark('data/feats.ark')} print(timeit.default_timer() - t_beg) t_beg = timeit.default_timer() comp = {k: m for k, m in kaldi_io.read_mat_ark('data/feats_compressed.ark')} print(timeit.default_timer() - t_beg) # ~8-10x slower, this is already reasonable, for key in orig.keys(): print(key, np.sum(np.abs(comp[key] - orig[key]))) # => The values are not identical, but very similar. # Can it be the `order' of arithmetic operations?
def trainGMMWithKaldi(wavFile, mdlFile, frameRate, segLen, kaldiRoot, vad, localGMM, numMix): # Given an audio file and GMM trained in Kaldi, compute mfcc features and frame-level posteriors # Inputs: # wavFile: Full path to wave file # A string # mdlFile: Full path to Kaldi model file # A string # frameRate: Number of frames per seconds # A scalar # segLen: Length of segment (in seconds) # A scalar # kaldiRoot: Full path to root directory of kaldi installation # A string # vad: Voiced activity decisions at frame level # A numpy logical array # localGMM: Whether to disregard the model file and train a GMM locally # A boolean variable # numMix: number of mixture in the GMM. Relevant only if localGMM=True # A scalar os.system('mkdir local_kaldi_data') with open("local_kaldi_data/temp.scp", "w") as input_scp: input_scp.write("temp %s" % wavFile) os.system( kaldiRoot + '/src/featbin/compute-mfcc-feats --frame-shift=' + str(1000 / frameRate) + ' --frame-length=40 --use-energy=true --num-ceps=19 scp:local_kaldi_data/temp.scp ark:local_kaldi_data/raw.ark' ) # If using velocity & acceleration features # os.system(kaldiRoot+'/src/featbin/compute-mfcc-feats --frame-shift='+str(1000/frameRate)+' --frame-length=40 --use-energy=false --num-ceps=19 scp:local_kaldi_data/temp.scp ark:- | '+kaldiRoot+'/src/featbin/add-deltas ark:- ark:local_kaldi_data/raw.ark') os.system( kaldiRoot + '/src/featbin/compute-cmvn-stats ark:local_kaldi_data/raw.ark ark:local_kaldi_data/cmvn.ark' ) os.system( kaldiRoot + '/src/featbin/apply-cmvn ark:local_kaldi_data/cmvn.ark ark:local_kaldi_data/raw.ark ark,scp:local_kaldi_data/out.ark,local_kaldi_data/out.scp' ) for key, mat in kaldi_io.read_mat_ark('local_kaldi_data/out.ark'): if vad is None: vad = doVADWithKaldi(wavFile, frameRate, kaldiRoot) if mat.shape[0] > vad.shape[0]: vad = np.hstack( (vad, np.zeros(mat.shape[0] - vad.shape[0]).astype('bool'))).astype('bool') elif mat.shape[0] < vad.shape[0]: vad = vad[:mat.shape[0]] mfcc = mat[vad, :] if localGMM == False: numMix = os.popen(kaldiRoot + '/src/gmmbin/gmm-global-info ' + mdlFile + ' | grep "number of gaussians" | awk \'{print $NF}\'' ).readlines()[0].strip('\n') os.system( kaldiRoot + '/src/gmmbin/gmm-global-get-post --n=' + numMix + ' ' + mdlFile + ' ark:local_kaldi_data/out.ark ark:local_kaldi_data/post.ark') else: pwd = os.getcwd() os.system( "sed \"s~local_kaldi_data~${PWD}/local_kaldi_data~g\" local_kaldi_data/out.scp > local_kaldi_data/feats.scp" ) os.system("echo \"temp temp\" > local_kaldi_data/utt2spk") os.system("sed -i \"/export KALDI_ROOT/c\export KALDI_ROOT=" + kaldiRoot + "\" train_diag_ubm.sh") os.system( "bash train_diag_ubm.sh --num-iters 20 --num-frames 500000 --nj 1 --num-gselect " + str(numMix) + " " + pwd + "/local_kaldi_data/ " + str(numMix) + " " + pwd + "/local_kaldi_data/") os.system( kaldiRoot + '/src/gmmbin/gmm-global-get-post --n=' + str(numMix) + ' local_kaldi_data/final.dubm ark:local_kaldi_data/out.ark ark:local_kaldi_data/post.ark' ) for key, post in kaldi_io.read_post_ark('local_kaldi_data/post.ark'): # Sort posteriors according to the mixture index for frameI in range(len(post)): post[frameI] = sorted(post[frameI], key=lambda x: x[0]) post = np.asarray(post)[:, :, 1] post = post[vad] segSize = frameRate * segLen segLikes = [] for segI in range(int(np.ceil(float(post.shape[0]) / (frameRate * segLen)))): startI = segI * segSize endI = (segI + 1) * segSize if endI > post.shape[0]: endI = mfcc.shape[0] - 1 if endI == startI: # Reached the end break segLikes.append(np.mean(post[startI:endI, :], axis=0)) os.system("rm -rf local_kaldi_data") return mfcc, vad, np.asarray(segLikes)
import sys import kaldi_io import numpy as np post_path = sys.argv[1] like_path = sys.argv[2] pdfali_path = sys.argv[3] phoneali_path = sys.argv[4] sil_id_not_bigger_than = int(sys.argv[5]) frame_score_dest = sys.argv[6] phone_score_dest = sys.argv[7] score_dest = sys.argv[8] # read in posts = {key: mat for key, mat in kaldi_io.read_mat_ark(post_path)} likes = {key: mat for key, mat in kaldi_io.read_mat_ark(like_path)} pdfalis = {key: vec for key, vec in kaldi_io.read_ali_ark(pdfali_path)} phonealis = {key: vec for key, vec in kaldi_io.read_ali_ark(phoneali_path)} f_f = open(frame_score_dest, 'w') f_p = open(phone_score_dest, 'w') f = open(score_dest, 'w') #f.write('wav_id gop_posterior gop_likelihood gop_likelihood_ratio\n') for key in pdfalis.keys(): post = posts[key] like = likes[key] pdfali = pdfalis[key] phoneali = phonealis[key]
def train(self, config, **kwargs): config_parameters = parse_config_or_kwargs(config, **kwargs) outputdir = os.path.join( config_parameters['outputpath'], config_parameters['model'], "{}_{}".format( datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%m'), uuid.uuid1().hex)) checkpoint_handler = ModelCheckpoint( outputdir, 'run', n_saved=1, require_empty=False, create_dir=True, score_function=lambda engine: -engine.state.metrics['Loss'], save_as_state_dict=False, score_name='loss') train_kaldi_string = parsecopyfeats( config_parameters['trainfeatures'], **config_parameters['feature_args']) dev_kaldi_string = parsecopyfeats(config_parameters['devfeatures'], **config_parameters['feature_args']) logger = genlogger(os.path.join(outputdir, 'train.log')) logger.info("Experiment is stored in {}".format(outputdir)) for line in pformat(config_parameters).split('\n'): logger.info(line) scaler = getattr( pre, config_parameters['scaler'])(**config_parameters['scaler_args']) inputdim = -1 logger.info("<== Estimating Scaler ({}) ==>".format( scaler.__class__.__name__)) for _, feat in kaldi_io.read_mat_ark(train_kaldi_string): scaler.partial_fit(feat) inputdim = feat.shape[-1] assert inputdim > 0, "Reading inputstream failed" logger.info("Features: {} Input dimension: {}".format( config_parameters['trainfeatures'], inputdim)) logger.info("<== Labels ==>") train_label_df = pd.read_csv( config_parameters['trainlabels']).set_index('Participant_ID') dev_label_df = pd.read_csv( config_parameters['devlabels']).set_index('Participant_ID') train_label_df.index = train_label_df.index.astype(str) dev_label_df.index = dev_label_df.index.astype(str) # target_type = ('PHQ8_Score', 'PHQ8_Binary') target_type = ('PHQ8_Score', 'PHQ8_Binary') n_labels = len(target_type) # PHQ8 + Binary # Scores and their respective PHQ8 train_labels = train_label_df.loc[:, target_type].T.apply( tuple).to_dict() dev_labels = dev_label_df.loc[:, target_type].T.apply(tuple).to_dict() train_dataloader = create_dataloader( train_kaldi_string, train_labels, transform=scaler.transform, shuffle=True, **config_parameters['dataloader_args']) cv_dataloader = create_dataloader( dev_kaldi_string, dev_labels, transform=scaler.transform, shuffle=False, **config_parameters['dataloader_args']) model = getattr(models, config_parameters['model'])( inputdim=inputdim, output_size=n_labels, **config_parameters['model_args']) if 'pretrain' in config_parameters: logger.info("Loading pretrained model {}".format( config_parameters['pretrain'])) pretrained_model = torch.load(config_parameters['pretrain'], map_location=lambda st, loc: st) if 'Attn' in pretrained_model.__class__.__name__: model.lstm.load_state_dict(pretrained_model.lstm.state_dict()) else: model.net.load_state_dict(pretrained_model.net.state_dict()) logger.info("<== Model ==>") for line in pformat(model).split('\n'): logger.info(line) criterion = getattr( losses, config_parameters['loss'])(**config_parameters['loss_args']) optimizer = getattr(torch.optim, config_parameters['optimizer'])( list(model.parameters()) + list(criterion.parameters()), **config_parameters['optimizer_args']) poolingfunction = parse_poolingfunction( config_parameters['poolingfunction']) criterion = criterion.to(device) model = model.to(device) def _train_batch(_, batch): model.train() with torch.enable_grad(): optimizer.zero_grad() outputs, targets = Runner._forward(model, batch, poolingfunction) loss = criterion(outputs, targets) loss.backward() optimizer.step() return loss.item() def _inference(_, batch): model.eval() with torch.no_grad(): return Runner._forward(model, batch, poolingfunction) def meter_transform(output): y_pred, y = output # y_pred is of shape [Bx2] (0 = MSE, 1 = BCE) # y = is of shape [Bx2] (0=Mse, 1 = BCE) return torch.sigmoid(y_pred[:, 1]).round(), y[:, 1].long() precision = Precision(output_transform=meter_transform, average=False) recall = Recall(output_transform=meter_transform, average=False) F1 = (precision * recall * 2 / (precision + recall)).mean() metrics = { 'Loss': Loss(criterion), 'Recall': Recall(output_transform=meter_transform, average=True), 'Precision': Precision(output_transform=meter_transform, average=True), 'MAE': MeanAbsoluteError( output_transform=lambda out: (out[0][:, 0], out[1][:, 0])), 'F1': F1 } train_engine = Engine(_train_batch) inference_engine = Engine(_inference) for name, metric in metrics.items(): metric.attach(inference_engine, name) RunningAverage(output_transform=lambda x: x).attach( train_engine, 'run_loss') pbar = ProgressBar(persist=False) pbar.attach(train_engine, ['run_loss']) scheduler = getattr(torch.optim.lr_scheduler, config_parameters['scheduler'])( optimizer, **config_parameters['scheduler_args']) early_stop_handler = EarlyStopping( patience=5, score_function=lambda engine: -engine.state.metrics['Loss'], trainer=train_engine) inference_engine.add_event_handler(Events.EPOCH_COMPLETED, early_stop_handler) inference_engine.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, { 'model': model, 'scaler': scaler, 'config': config_parameters }) @train_engine.on(Events.EPOCH_COMPLETED) def compute_metrics(engine): inference_engine.run(cv_dataloader) validation_string_list = [ "Validation Results - Epoch: {:<3}".format(engine.state.epoch) ] for metric in metrics: validation_string_list.append("{}: {:<5.2f}".format( metric, inference_engine.state.metrics[metric])) logger.info(" ".join(validation_string_list)) pbar.n = pbar.last_print_n = 0 @inference_engine.on(Events.COMPLETED) def update_reduce_on_plateau(engine): val_loss = engine.state.metrics['Loss'] if 'ReduceLROnPlateau' == scheduler.__class__.__name__: scheduler.step(val_loss) else: scheduler.step() train_engine.run(train_dataloader, max_epochs=config_parameters['epochs']) # Return for further processing return outputdir
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. import sys import pickle import kaldi_io as kio from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('model', help='file with CMVN model') args = parser.parse_args() # Load CMVN model in_file = open(args.model, 'rb') scaler = pickle.load(in_file) in_file.close() # Transform input data and write them to stdout for utt, x in kio.read_mat_ark(sys.stdin.buffer): x_transformed = scaler.transform(x) kio.write_mat(sys.stdout.buffer, x_transformed, key=utt)
def feats_ark_generator(ark, name): generator = kaldi_io.read_mat_ark(ark) return prepend_generator(generator, name)
def get_output(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetCurlClassifier(nnet['feature_dim'] * nnet['num_frames'], nnet['encoder_num_layers'], nnet['decoder_num_layers'], nnet['classifier_num_layers'], nnet['hidden_dim'], nnet['hidden_dim_classifier'], nnet['bn_dim'], nnet['comp_num'], nnet['num_classes'], use_gpu=False) model.load_state_dict(nnet['model_state_dict']) feats_config = pickle.load(open(config.egs_config, 'rb')) lsm = torch.nn.LogSoftmax(1) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) if config.prior: prior = pickle.load(open(config.prior, 'rb')) post_dict = {} model.eval() for utt_id, mat in kaldi_io.read_mat_ark(cmd): mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) out, _, _ = model(mat, batch_l) if config.prior: post_dict[utt_id] = lsm( out[0, :, :]).data.numpy() - config.prior_weight * prior else: if config.add_softmax: post_dict[utt_id] = sm(out[0, :, :]).data.numpy() else: post_dict[utt_id] = out[0, :, :].data.numpy() return post_dict
} # binary-ark, flt_vec3 = {k: v for k, v in kaldi_io.read_vec_flt_ark('data/conf_ascii.ark') } # ascii-ark, # - store, with kaldi_io.open_or_fd('data_re-saved/conf.ark', 'wb') as f: for k, v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k) # - read and compare, for k, v in kaldi_io.read_vec_flt_ark('data_re-saved/conf.ark'): assert (np.array_equal(v, flt_vec[k])) print('testing matrix i/o') flt_mat = {k: m for k, m in kaldi_io.read_mat_scp('data/feats_ascii.scp') } # ascii-scp, flt_mat2 = {k: m for k, m in kaldi_io.read_mat_ark('data/feats_ascii.ark') } # ascii-ark, flt_mat3 = {k: m for k, m in kaldi_io.read_mat_ark('data/feats.ark')} # ascii-ark, # - store, with kaldi_io.open_or_fd('data_re-saved/mat.ark', 'wb') as f: for k, m in flt_mat3.items(): kaldi_io.write_mat(f, m, k) # - read and compare, for k, m in kaldi_io.read_mat_ark('data_re-saved/mat.ark'): assert (np.array_equal(m, flt_mat3[k])) print('all tests passed...')
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size, shuffle=True) init_fer = True if init_fer: # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] tr_losses = [] for utt_id, mat in kaldi_io.read_mat_ark(cmd): model.eval() if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - torch.FloatTensor(mean).cuda() else: post = out[1] - torch.FloatTensor(mean) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = stable_mse(outputs, batch_x) else: loss = stable_mse(outputs, batch_x[:, config.time_shift:, :]) tr_losses.append(loss.item()) loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update, Tr MSE Loss: {:.3f} :: Dev loss: {:.3f} :: Dev FER: {:.2f}".format( epoch, np.mean(tr_losses), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def get_output(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) if config.ae_type == "normal": model = nnetAEClassifierMultitask( nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'], nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'], nnet['enc_dropout']) elif config.ae_type == "vae": model = nnetVAEClassifier(nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'], nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'], nnet['enc_dropout'], use_gpu=False) elif config.ae_type == "noae": model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) elif config.ae_type == "vaeenc": nnet[ 'vaeenc'] = "exp_hybrid/hybrid_lll/nnet_vae_enc2l_dec2l_300nodes/exp_1.dir/exp_1__epoch_160.model" vae = torch.load(nnet['vaeenc'], map_location=lambda storage, loc: storage) vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model = VAEEncodedClassifier(vae_model, vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) else: print("Model type {} not supported!".format(config.ae_type)) sys.exit(1) model.load_state_dict(nnet['model_state_dict']) feats_config = pickle.load(open(config.egs_config, 'rb')) lsm = torch.nn.LogSoftmax(1) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) if config.prior: prior = pickle.load(open(config.prior, 'rb')) post_dict = {} model.eval() for utt_id, mat in kaldi_io.read_mat_ark(cmd): mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) if config.ae_type == "normal": out, _ = model(mat, batch_l) elif config.ae_type == "vae": out, _, _ = model(mat, batch_l) elif config.ae_type == "noae": out = model(mat, batch_l) elif config.ae_type == "vaeenc": out = model(mat, batch_l) if config.prior: post_dict[utt_id] = lsm( out[0, :, :]).data.numpy() - config.prior_weight * prior else: if config.add_softmax: post_dict[utt_id] = sm(out[0, :, :]).data.numpy() else: post_dict[utt_id] = out[0, :, :].data.numpy() return post_dict
nn_Phn = 'Babel-ML17_SBN80_PhnStates3096' else: #logging.info('Unknown option %s for NN weights, cannot extract posteriors. Valid options are: FisherMono, FisherTri, BabelMulti',nn) sys.exit() if os.path.dirname(sys.argv[0]) == '': nn_weights_Phn = 'nn_weights/' + nn_Phn + '.npz' else: nn_weights_Phn = os.path.dirname( sys.argv[0]) + '/nn_weights/' + nn_Phn + '.npz' nn_weights_Phn = np.load(nn_weights_Phn) utterance_names_file = wavdir + '/utterance_names' name_mfcc_dict = { key: mat for key, mat in kaldi_io.read_mat_ark(mfcc_ark_file) } with open(mfcc_comb_ark_file, 'wb') as f: for line in open(utterance_names_file): name = line.strip('\n') audio_input = wavdir + '/' + name + '.wav' #extract fbanks and BN features try: mfcc = name_mfcc_dict[name] except: logging.info("There is no mfcc feautre for %s", audio_input) continue try: signal = read_signal(audio_input)
args = parser.parse_args() idx_to_phn_name_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.txt') phn_to_idx_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.int') pfeats_name_to_idx_file = os.path.join(args.exp_dir, 'pfeats_name_to_idx.txt') out_file = os.path.join(args.eval_dir, 'accuracy.txt') output = open(out_file, 'w') # Mappings idx_to_pfeats_name = read_inv_phone_map(pfeats_name_to_idx_file) idx_to_phn_name = read_inv_phone_map(idx_to_phn_name_file) pfeats_map = PFeatsMap(phn_to_idx_file, args.lang) # Inputs aligns_it = kio.read_vec_int_ark(args.align) pfeats_it = kio.read_mat_ark(args.pfeats) # Counters phn_cnt = np.zeros(pfeats_map.phn_dim(), dtype=np.int) phn_correct = np.zeros_like(phn_cnt) pfeats_correct = np.zeros(pfeats_map.pfeats_dim(), dtype=np.int) # Evaluate accuracy for utt_phones, utt_pfeats_real in zip(aligns_it, pfeats_it): for phone, pfeats_real in zip(utt_phones[1], utt_pfeats_real[1]): pfeats_real = np.exp(pfeats_real) if not pfeats_map.is_phn_valid(phone): continue pfeats_true = pfeats_map.phn_to_pfeats(phone) pfeats_pred = np.round(pfeats_real)
utt2ali = { key: ali for key, ali in tqdm( kaldi_io.read_vec_int_ark( f'ark: gunzip -c {args.ali_dir}/ali_pdf.1.gz|')) } i = 0 for key, ali in tqdm(utt2ali.items()): i += 1 ali_stretch_model.add_utts(ali) logger.info(f"AliStretchModel processed {i} utterances") ali_stretch_model.compute() ali_stretch_model.save_to_file() logger.info(f"Loaded {len(utt2ali)} alis") logger.info(f"Loading logprobs and train model") i = 0 for k, m in tqdm( kaldi_io.read_mat_ark(f'ark: cat {args.ali_dir}/output.1.ark |'), total=len(utt2ali)): i += 1 if k not in utt2ali.keys(): logger.warning(f"Ali for {k} does not exist") continue ali = utt2ali[k] id2ll_model.add_utts(ali, m) logger.info(f"Id2LoglikeAMModel processed {i} utterances") id2ll_model.compute() id2ll_model.save_to_file() logger.info(f"Done.")
def load_dataset(fea_scp, fea_opts, lab_folder, lab_opts, left, right, max_sequence_length, fea_only=False): fea = { k: m for k, m in kaldi_io.read_mat_ark('ark:copy-feats scp:' + fea_scp + ' ark:- |' + fea_opts) } if not fea_only: lab = { k: v for k, v in kaldi_io.read_vec_int_ark('gunzip -c ' + lab_folder + '/ali*.gz | ' + lab_opts + ' ' + lab_folder + '/final.mdl ark:- ark:-|') if k in fea } # Note that I'm copying only the aligments of the loaded fea fea = { k: v for k, v in fea.items() if k in lab } # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded") end_snt = 0 end_index = [] snt_name = [] fea_conc = [] lab_conc = [] tmp = 0 for k in sorted(sorted(fea.keys()), key=lambda k: len(fea[k])): ##### # If the sequence length is above the threshold, we split it with a minimal length max/4 # If max length = 500, then the split will start at 500 + (500/4) = 625. # A seq of length 625 will be splitted in one of 500 and one of 125 if (len(fea[k]) > max_sequence_length) and max_sequence_length > 0: fea_chunked = [] lab_chunked = [] for i in range((len(fea[k]) + max_sequence_length - 1) // max_sequence_length): if (len(fea[k][i * max_sequence_length:]) > max_sequence_length + (max_sequence_length / 4)): fea_chunked.append(fea[k][i * max_sequence_length:(i + 1) * max_sequence_length]) if not fea_only: lab_chunked.append( lab[k][i * max_sequence_length:(i + 1) * max_sequence_length]) else: lab_chunked.append( np.zeros((fea[k][i * max_sequence_length:(i + 1) * max_sequence_length].shape[0], ))) else: fea_chunked.append(fea[k][i * max_sequence_length:]) if not fea_only: lab_chunked.append(lab[k][i * max_sequence_length:]) else: lab_chunked.append( np.zeros( (fea[k][i * max_sequence_length:].shape[0], ))) break for j in range(0, len(fea_chunked)): fea_conc.append(fea_chunked[j]) lab_conc.append(lab_chunked[j]) snt_name.append(k + '_split' + str(j)) else: fea_conc.append(fea[k]) if not fea_only: lab_conc.append(lab[k]) else: lab_conc.append(np.zeros((fea[k].shape[0], ))) snt_name.append(k) tmp += 1 fea_zipped = zip(fea_conc, lab_conc) fea_sorted = sorted(fea_zipped, key=lambda x: x[0].shape[0]) fea_conc, lab_conc = zip(*fea_sorted) for entry in fea_conc: end_snt = end_snt + entry.shape[0] end_index.append(end_snt) fea_conc = np.concatenate(fea_conc) lab_conc = np.concatenate(lab_conc) return [snt_name, fea_conc, lab_conc, np.asarray(end_index)]
exit() LCLDA_filename = sys.argv[1] LCPLDA_filename = sys.argv[2] spkmean_filename = sys.argv[3] tsne_filename = sys.argv[4] save_path = sys.argv[5] # read LCLDA mat into lda_vec col_indx = 0 lclda_vec = {'Description':' This vec set store the columns of the LDA matrix from all speakers.'} lclda_vec2 = {'Description':' This vec set store the lastcolumns of the LDA matrix from all speakers.'} dim = 0 for k,m in kaldi_io.read_mat_ark(LCLDA_filename): dim = m.shape[1] #numcol vec = m[col_indx,:] lclda_vec[k] = vec lclda_vec2[k] = m[-1,:] # read LCPDA mat into lda_vec lcplda_vec = {'Description':' This vec set store the columns of the LDA matrix from all speakers.'} lcplda_vec2 = {'Description':' This vec set store the lastcolumns of the LDA matrix from all speakers.'} dim = 0 for k,m in kaldi_io.read_mat_ark(LCPLDA_filename): dim = m.shape[1] #numcol vec = m[col_indx,:] lcplda_vec[k] = vec
def sample(data_path: str, encoder_path: str, vocab_path: str, sample_length: int = 30, output: str = None, ch: bool = True): dump = torch.load(encoder_path, map_location=lambda storage, loc: storage) encodermodel = dump['encodermodel'] decodermodel = dump['decodermodel'] # Some scaler (sklearn standardscaler) scaler = dump['scaler'] # Also load previous training config config_parameters = dump['config'] vocab = torch.load(vocab_path) # print(encodermodel) # print(decodermodel) # load images from previous encodermodel = encodermodel.to(DEVICE).eval() decodermodel = decodermodel.to(DEVICE).eval() kaldi_string = parsecopyfeats(data_path, **config_parameters['feature_args']) width_length = sample_length * 4 with stdout_or_file(output) as writer: writer.write( tp.header(["InputUtterance", "Output Sentence"], style='grid', width=width_length)) writer.write('\n') sentences = set() for k, features in kaldi_io.read_mat_ark(kaldi_string): features = scaler.transform(features) # Add single batch dimension features = torch.from_numpy(features).to(DEVICE).unsqueeze(0) # Generate an caption embedding encoded_feature, hiddens = encodermodel(features) sampled_ids = decodermodel.sample(encoded_feature, states=hiddens, maxlength=sample_length) # (1, max_seq_length) -> (max_seq_length) sampled_ids = sampled_ids[0].cpu().numpy() # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break if ch: sentence = ''.join(sampled_caption) else: sentence = ' '.join(sampled_caption) sentences.add(sentence) # Print out the image and the generated caption writer.write( tp.row([k, sentence], style='grid', width=width_length)) writer.write('\n') writer.flush() writer.write(tp.bottom(2, style='grid', width=width_length)) writer.write('\n') writer.write('Number of unique sentences: ' + str(len(sentences)))
log.info("n_cores: " + str(n_cores)) log.info("variables to extract: " + extract_var_name) log.info("side info: " + side_info) log.info("store option: " + store_option) log.info("store format: " + store_format) log.info("context: " + str(context)) log.info("architecture: " + arch) log.info("use_gpu: " + str(use_gpu)) log.info("instance_norm: " + str(instance_norm)) log.info("Extracting embeddings") overlap = window - shift if (vad_scp != "None"): log.info("Will apply VAD to the features.") feat_rsp = "ark:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:" + scp + " ark:- | select-voiced-frames ark:- scp:" + vad_scp + " ark:- |" feats_generator = kaldi_io.read_mat_ark(feat_rsp) else: log.info("Assuming VAD has already been applied to the features.") feat_rsp = "scp:" + scp feats_generator = kaldi_io.read_mat_scp(feat_rsp) if use_gpu: # Detect which GPU to use command = 'nvidia-smi --query-gpu=memory.free,memory.total --format=csv |tail -n+2| awk \'BEGIN{FS=" "}{if ($1/$3 > 0.98) print NR-1}\'' try: os.environ["CUDA_VISIBLE_DEVICES"] = subprocess.check_output( command, shell=True).decode('utf-8').rsplit('\n')[0] log.info("CUDA_VISIBLE_DEVICES " + os.environ["CUDA_VISIBLE_DEVICES"]) except subprocess.CalledProcessError: log.info("No GPU seems to be available")
} plotdata_bykey(feats, key, flag_decode) print(' '.join([ transid2info[it]['phone'] + transid2info[it]['hmmstate'] # + transid2info[it]['transtext'] for it in alignment[key] ])) if __name__ == '__main__': IS_DECODE = False # True # feats = { k: m for k, m in kaldi_io.read_mat_ark('../feats/feats_train.ark') } keey = list(feats.keys())[0] #keey = 'Click3_14717' plot_ali(keey, IS_DECODE) IS_DECODE = True plot_ali(keey, IS_DECODE) plt.show() # >>> ll = plt.plot_ali(x,y) # >>> xl = plt.xlabel('horizontal axis') # >>> yl = plt.ylabel('vertical axis') # >>> ttl = plt.title('sine function') # >>> ax = plt.axis([-2, 12, -1.5, 1.5]) # >>> grd = plt.grid(True) # >>> txt = plt.text(0,1.3,'here is some text')
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=config.log_file, filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) if config.loss == "MSE": criterion = samplewise_mse elif config.loss == "L1": criterion = samplewise_abs else: logging.info('Loss function {:s} is not supported'.format(config.loss)) sys.exit(1) pi = [int(t) for t in config.pm_index.split(',')] # Figure out all feature stuff shuff_file = config.scp feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, shuff_file) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file) else: cmd = shuff_file if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = seq2seqRNNAE(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim'], False, config.decoder_input) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) pm_paths = config.pms.split(',') if len(pi) != len(pm_paths): logging.error("Number of pm models {:d} and number indices {:d} do not match".format(len(pm_paths), len(pi))) sys.exit(0) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) pm_scores = {} for idx in range(num_pm_models): pm_scores[idx] = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): batches = [] lens = mat.shape[0] if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) for idx in range(num_pm_models): if config.use_gpu: if pi[idx] == 0: post = out[1] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][pi[idx]] - Variable(torch.FloatTensor(means[idx])).cuda() else: if pi[idx] == 0: post = out[1] - Variable(torch.FloatTensor(means[0])) else: post = out[0][pi[idx]] - Variable(torch.FloatTensor(means[idx])) batches.append(post) ## Get the PM scores lens = torch.IntTensor([lens]) for idx in range(num_pm_models): batch_x = batches[idx] batch_x = batch_x[None, :, :] ae_model = pm_models[idx] batch_l = lens outputs = ae_model(batch_x, batch_l) loss = criterion(outputs, batch_x).mean() pk = pm_scores[idx] pk[utt_id] = loss.item() pm_scores[idx] = pk pickle.dump(pm_scores, open(os.path.join(config.out_file), "wb"))
def create_dataloader_train_cv( feature_string, train_label_dict, transform=None, target_label_name='label', outdomain_label=0, **kwargs): """create_dataloader_train_cv :param feature_string: kaldi feature pipline string e.g., copy-feats ark:file.ark ark:- | :param train_label_dict: Mappings from each kaldi ark file to label :param transform: Feature transformation, usually scaler.transform :param **kwargs: Other parameters """ train_percentage = kwargs.get('percent', 90)/100 batch_size = kwargs.get('batch_size', 8) def valid_feat(item): """valid_feat Checks if feature is in labels :param item: key value pair from read_mat_arkoftmax(logits, dim=1) """ return item[0] in train_label_dict features = [] labels = [] # Directly filter out all utterances without labels for idx, (k, feat) in enumerate(filter(valid_feat, kaldi_io.read_mat_ark(feature_string))): if transform: feat = transform(feat) features.append(feat) labels.append(train_label_dict[k]) # 90/10 split for training data if target_label_name == 'label': X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=train_percentage, stratify=labels, random_state=0) # Train dataset train_dataset = ListDataset(X_train, y_train) train_dataloader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=batch_size, collate_fn=collate_fn) # CV dataset cv_dataset = ListDataset(X_test, y_test) cv_dataloader = torch.utils.data.DataLoader( cv_dataset, shuffle=False, batch_size=batch_size, collate_fn=collate_fn) else: #'DAT' or 'DADA' outdomain_utt_num = sum(np.array(labels)[:,1]==outdomain_label) indomain_utt_num = sum(np.array(labels)[:,1]!=outdomain_label) outdomain_features, outdomain_labels = [], [] indomain_features, indomain_labels = [], [] for i in range(len(labels)): feature = features[i] label = labels[i] if label[1] == outdomain_label: outdomain_features.append(feature) outdomain_labels.append(label) else: indomain_features.append(feature) indomain_labels.append(label) assert outdomain_utt_num + indomain_utt_num == len(labels), "Outdomain label error!" X_train1, X_cv1, y_train1, y_cv1 = train_test_split(outdomain_features, outdomain_labels, train_size=train_percentage, stratify=outdomain_labels, random_state=0) # Oversample the train dataset that has fewer samples random_oversampler = RandomOverSampler(random_state=0) X = X_train1 + indomain_features y = y_train1 + indomain_labels _, _ = random_oversampler.fit_resample(torch.empty(len(X), 1), np.array(y)[:,1]) indicies = random_oversampler.sample_indices_ len1 = len(y_train1) print("Outdomain num: {}".format(len1)) len2 = len(indomain_labels) print("Indomain num: {}".format(len2)) X_train_outdomain, y_train_outdomain = [], [] X_train_indomain, y_train_indomain = [], [] for index in indicies: feature = X[index] label = y[index] if label[1] == outdomain_label: X_train_outdomain.append(feature) y_train_outdomain.append(label) else: X_train_indomain.append(feature) y_train_indomain.append(label) assert len(y_train_outdomain)==len(y_train_indomain), "indomain_num != outdomain_num" print("Outdomain/indomain num: {}".format(len(y_train_outdomain))) outdomain_train_dataset = ListDataset(X_train_outdomain, y_train_outdomain) outdomain_train_dataloader = torch.utils.data.DataLoader( outdomain_train_dataset, shuffle=True, batch_size=batch_size, collate_fn=collate_fn) indomain_train_dataset = ListDataset(X_train_indomain, y_train_indomain) indomain_train_dataloader = torch.utils.data.DataLoader( indomain_train_dataset, shuffle=True, batch_size=batch_size, collate_fn=collate_fn) # CV dataset X_cv = X_cv1 y_cv = y_cv1 #print("CV Num: {}".format(len(y_cv))) #print("CV Labels: {}".format(y_cv)) cv_dataset = ListDataset(X_cv, y_cv) cv_dataloader = torch.utils.data.DataLoader( cv_dataset, shuffle=False, batch_size=batch_size, collate_fn=collate_fn) if target_label_name == 'label': return train_dataloader, cv_dataloader else: # 'DAT' or 'DADA' return outdomain_train_dataloader, indomain_train_dataloader, cv_dataloader
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shuff_file = str(os.getpid()) + '.scp' shell_cmd = "cat {:s} | shuf > {:s}".format(config.scp, shuff_file) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, shuff_file) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file) else: cmd = shuff_file if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) # Load dev set data ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True) # Regularization data load_chunk = torch.load(config.regularized_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset_reg = nnetDataset(dev_data, dev_labels) sampler = torch.utils.data.RandomSampler(dataset_reg) reg_set = list(torch.utils.data.BatchSampler(sampler, batch_size=50000, drop_last=False)) samp_range = len(reg_set) init_fer = True if init_fer: # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] tr_losses = [] reg_losses = [] for idx in range(num_pm_models): tr_losses.append([]) for utt_id, mat in kaldi_io.read_mat_ark(cmd): lens.append(min(mat.shape[0], config.max_seq_len)) if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - Variable(torch.FloatTensor(means[0])).cuda() else: post = out[1] - Variable(torch.FloatTensor(means[0])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[0] batch = torch.cat([batch, post[None, :, :]], 0) batches[0] = batch for idx in range(1, num_pm_models): if config.use_gpu: post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[idx] batch = torch.cat([batch, post[None, :, :]], 0) batches[idx] = batch utt_count += 1 if utt_count == config.batch_size: update_num += 1 ## DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) if config.use_gpu: loss_all = torch.FloatTensor([1]).cuda() else: loss_all = torch.FloatTensor([1]) for idx in range(num_pm_models): batch_x = batches[idx][indices] ae_model = pm_models[idx] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = stable_mse(outputs, batch_x) else: loss = stable_mse(outputs, batch_x[:, config.time_shift:, :]) loss_all *= loss tl = tr_losses[idx] tl.append(loss.item()) tr_losses[idx] = tl # Regularization stuff ids = reg_set[random.randrange(samp_range)] if config.use_gpu: reg_x = Variable(dataset_reg[ids][0]).cuda() reg_l = Variable(dataset_reg[ids][1]).cuda() else: reg_x = Variable(dataset_reg[ids][0]) reg_l = Variable(dataset_reg[ids][1]) _, reg_x = model(reg_x) loss_reg = dev_criterion(reg_x, reg_l) reg_losses.append(loss_reg.item()) loss_total = loss_all + config.reg_weight * loss_reg loss_total.backward() optimizer.step() batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch)) # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update ".format(epoch) for idx in range(num_pm_models): print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx])) print_log = print_log + "Regularization Loss: {:.3f}".format(np.mean(reg_losses)) print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch + 1) + '.model') torch.save({ 'epoch': epoch + 1, 'ep_loss_dev': ep_loss_dev, 'ep_fer_dev': ep_fer_dev, 'tr_losses': tr_losses, 'reg_losses': reg_losses, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb'))) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
nargs='?', help='Prior probabilities in CSV format') parser.add_argument('--inv_log', help='output probabilities in range [0, 1]', action='store_true') args = parser.parse_args() if not args.model.endswith('.h5'): raise TypeError('Unsupported model type. Please use h5 format.') from pdf_model import PdfModel # Load model m = PdfModel.load(args.model) p = 1.0 if args.priors: p = np.genfromtxt(args.priors, delimiter=',') p[p == 0] = 1e-5 # Deal with zero priors # Read feature vectors from stdin and forward them through the nnet for utt_id, feat_mat in kio.read_mat_ark(sys.stdin.buffer): out_mat = m.predict(feat_mat) / p out_mat[out_mat == 0] = 1e-5 if not args.inv_log: out_mat = np.log(out_mat) out_mat[out_mat == -np.inf] = -100 kio.write_mat(sys.stdout.buffer, out_mat, key=utt_id)
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.CrossEntropyLoss() criterion_ae = nn.MSELoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) ep_loss_dev = [] ep_fer_dev = [] # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) # Development data load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader_check = torch.utils.data.DataLoader( dataset, batch_size=config.batch_size, shuffle=True) # Regularization data load_chunk = torch.load(config.regularized_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset_reg = nnetDataset(dev_data, dev_labels) sampler = torch.utils.data.RandomSampler(dataset_reg) reg_set = list( torch.utils.data.BatchSampler(sampler, batch_size=config.batch_size, drop_last=False)) samp_range = len(reg_set) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) ## BEGIN MAIN EPOCHS! unsup_update = False sup_update = True for epoch in range(config.epochs): if unsup_update: # First lets do an unsupervised update with RNN-AE if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) utt_count = 0 update_num = 0 ae_loss = [] lens = [] model.train() for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: post = model(Variable( torch.FloatTensor(mat)).cuda()) - Variable( torch.FloatTensor(mean)).cuda() else: post = model(Variable(torch.FloatTensor(mat))) - Variable( torch.FloatTensor(mean)) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 if utt_count == 64: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = criterion_ae(outputs, batch_x) else: loss = criterion_ae(outputs, batch_x[:, config.time_shift:, :]) ae_loss.append(loss.item() / (config.max_seq_len * 64)) loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 logging.info('Finished unsupervised update of nnet') else: logging.info('Skipped unsupervised update of nnet') ae_loss = 0 if sup_update: # Add noisy labelled data into training set new_egs = torch.empty(0, nnet['feature_dim'] * nnet['num_frames'] + 1) new_utt_count = 0 for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)).cuda()) ).cpu().data.numpy(), axis=1) else: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)))).data.numpy(), axis=1) add_egs = np.hstack((mat, labs[:, np.newaxis])) new_egs = torch.cat([new_egs, torch.FloatTensor(add_egs)]) ## Update with these new utterances train_data = new_egs[:, 0:-1] train_labels = new_egs[:, -1].long() dataset = nnetDataset(train_data, train_labels) data_loader = torch.utils.data.DataLoader( dataset, batch_size=config.batch_size, shuffle=True) model.train() train_losses = [] reg_losses = [] tr_fer = [] reg_fer = [] for batch_x, batch_l in data_loader: ids = reg_set[random.randrange(samp_range)] if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() reg_x = Variable(dataset_reg[ids][0]).cuda() reg_l = Variable(dataset_reg[ids][1]).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) reg_x = Variable(dataset_reg[ids][0]) reg_l = Variable(dataset_reg[ids][1]) batch_x = model(batch_x) reg_x = model(reg_x) optimizer.zero_grad() loss_norm = criterion(batch_x, batch_l) loss_reg = criterion(reg_x, reg_l) train_losses.append(loss_norm.item()) reg_losses.append(loss_reg.item()) if config.use_gpu: tr_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) reg_fer.append( compute_fer(reg_x.cpu().data.numpy(), reg_l.cpu().data.numpy())) else: tr_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) reg_fer.append( compute_fer(reg_x.data.numpy(), reg_l.data.numpy())) # loss = config.reg_weight * loss_reg + 0*loss_norm loss = loss_reg loss.backward() optimizer.step() logging.info('Finished Supervised update of nnet') else: logging.info('Skipped Supervised update of nnet') train_losses = 0 tr_fer = 0 reg_losses = 0 reg_fer = 0 ## CHECK IF ADAPTATION IS WORKING AT ALL model.eval() val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} AE Loss: {:3f}, Train Loss: {:.3f}, Train fer: {:.3f}, Reg Loss: {:.3f}, Reg Fer: {:.3f}, Dev loss: {:.3f}, Dev FER: {:.2f}".format( epoch, np.mean(ae_loss), np.mean(train_losses), np.mean(tr_fer), np.mean(reg_losses), np.mean(reg_fer), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
os.makedirs(args.res_dir, exist_ok=True) feats_fd = sys.stdin.buffer flags_fname = os.path.join(args.data_dir, 'text_ext_flags') ali_force_frame_fname = os.path.join(args.cbps_dir, 'force_ali_test/ali_frames.gz') ali_force_fname = os.path.join(args.cbps_dir, 'force_ali_test/ali_pdf.gz') score_fname = os.path.join(args.res_dir, 'score.ark') score_txt_fname = os.path.join(args.res_dir, 'score.txt') score_txt = open(score_txt_fname, 'w') cmp_fd = open('test/cmp_gop.txt', 'w') # Load kaldi files flags_it = kio.read_vec_int_ark(flags_fname) ali_force_it = kio.read_ali_ark(ali_force_fname) ali_force_frm_it = kio.read_ali_ark(ali_force_frame_fname) feats_it = kio.read_mat_ark(feats_fd) with open(score_fname, 'wb') as f: for flags_t, ali_force_t, ali_force_frm_t, feats_t in zip( flags_it, ali_force_it, ali_force_frm_it, feats_it): # Unpack each tuple utt, flags = flags_t _, ali_force = ali_force_t _, ali_force_frm = ali_force_frm_t _, feats = feats_t # Get only features for corresponding states in alignments probs_force = hlp.np_pick(feats, ali_force) # Calculate indexes of segments seg_lengths = hlp.get_seg_lengths(ali_force_frm)
def evaluate_threshold( model_path: str, features: str = "features/logmel_64/test.ark", result_filename='dev.txt', test_labels: str = "metadata/test/test.csv", threshold=0.5, window=1, hop_size=0.02): from dcase_util.data import ProbabilityEncoder, DecisionEncoder, ManyHotEncoder from dcase_util.containers import MetaDataContainer from scipy.signal import medfilt modeldump = torch.load( model_path, map_location=lambda storage, loc: storage) model = modeldump['model'] config_parameters = modeldump['config'] scaler = modeldump['scaler'] many_hot_encoder = modeldump['encoder'] model_dirname = os.path.dirname(model_path) meta_container_resultfile = os.path.join( model_dirname, "pred_nowindow.txt") metacontainer = MetaDataContainer(filename=meta_container_resultfile) kaldi_string = parsecopyfeats( features, **config_parameters['feature_args']) model = model.to(device).eval() probability_encoder = ProbabilityEncoder() decision_encoder = DecisionEncoder( label_list=many_hot_encoder.label_list ) binarization_type = 'global_threshold' if isinstance( threshold, float) else 'class_threshold' # If class thresholds are given, then use those if isinstance(threshold, str): threshold = torch.load(threshold) windows = {k: window for k in many_hot_encoder.label_list} if isinstance(window, str): windows = torch.load(window) with torch.no_grad(): for k, feat in kaldi_io.read_mat_ark(kaldi_string): # Add batch dim feat = torch.from_numpy( scaler.transform(feat)).to(device).unsqueeze(0) feat = model(feat) probabilities = torch.sigmoid(feat).cpu().numpy().squeeze(0) frame_decisions = probability_encoder.binarization( probabilities=probabilities, binarization_type=binarization_type, threshold=threshold, time_axis=0, ) for i, label in enumerate(many_hot_encoder.label_list): label_frame_decisions = medfilt( frame_decisions[:, i], kernel_size=windows[label]) # Found only zeros, no activity, go on if (label_frame_decisions == 0).all(): continue estimated_events = decision_encoder.find_contiguous_regions( activity_array=label_frame_decisions ) for [onset, offset] in estimated_events: metacontainer.append({'event_label': label, 'onset': onset * hop_size, 'offset': offset * hop_size, 'filename': os.path.basename(k) }) metacontainer.save() estimated_event_list = MetaDataContainer().load( filename=meta_container_resultfile) reference_event_list = MetaDataContainer().load(filename=test_labels) event_based_metric = event_based_evaluation( reference_event_list, estimated_event_list) onset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, offset=False) offset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, onset=False) onset_offset_scores = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list) # Utt wise Accuracy precision_labels = precision_recall_fscore_on_offset( reference_event_list, estimated_event_list, onset=False, offset=False, label=True) print(event_based_metric.__str__()) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("UttLabel", *precision_labels)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Onset", *onset_scores)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Offset", *offset_scores)) print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("On-Offset", *onset_offset_scores)) result_filename = os.path.join(model_dirname, result_filename) with open(result_filename, 'w') as wp: wp.write(event_based_metric.__str__()) wp.write('\n') wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( "UttLabel", *precision_labels)) wp.write( "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Onset", *onset_scores)) wp.write( "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Offset", *offset_scores)) wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( "On-Offset", *onset_offset_scores))