def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] if config.task_prior == "mm": print("using mm-measure based task priors") elif config.task_prior == "dp": print("using data based task priors") elif config.task_prior == "lowent": print("Using low-entropy prior") else: task_prior = config.task_prior.split(',') task_prior = [float(tp) for tp in task_prior] post_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): post = np.zeros((mat.shape[0], num_classes)) prior_acc = np.zeros(num_classes) mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) px_save = [] all_pcx = [] all_px = [] all_tp = [] all_tp_2 = [] for idx, model in enumerate(all_pcx_models): model.eval() out = model(mat, batch_l) ae_out, latent_out = all_px_models[idx](mat, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = (vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy()) px_save.append(np.mean(px)) pcx = sm(out[0, :, :]) px = np.tile(px, (pcx.shape[1], 1)).T all_pcx.append(pcx.data.numpy()) all_px.append(np.ones(px.shape)) if config.task_prior == "mm": mm = mmeasure_loss(pcx).item() all_tp.append(mm) print("task {:d} , mm={:.2f}".format(idx, mm)) elif config.task_prior == "dp": all_tp.append(px_save[idx]) elif config.task_prior == "lowent": mm = mmeasure_loss(pcx).item() all_tp.append(mm) all_tp_2.append(px_save[idx]) else: all_tp.append(task_prior[idx]) if config.task_prior == "mm": all_tp = np.asarray(all_tp, dtype=np.float64) if config.stream_selection: temp = np.zeros(all_tp.shape) temp[np.argmax(all_tp)] = 1 all_tp = temp else: all_tp = np.exp(all_tp) / np.sum(np.exp(all_tp)) if np.isnan(all_tp[0]): print("Switching to uniform priors") all_tp = np.ones(num_domains) / num_domains elif config.task_prior == "dp": all_tp = np.asarray(all_tp, dtype=np.float64) if config.stream_selection: temp = np.zeros(all_tp.shape) temp[np.argmax(all_tp)] = 1 all_tp = temp else: all_tp = np.exp(300 * all_tp) / np.sum(np.exp(300 * all_tp)) elif config.task_prior == "lowent": all_tp = np.asarray(all_tp) all_tp = np.exp(all_tp) / np.sum(np.exp(all_tp)) if np.isnan(all_tp[0]): print("Switching to uniform priors") all_tp = np.ones(num_domains) / num_domains all_tp_2 = np.asarray(all_tp_2) all_tp_2 = np.exp(300 * all_tp_2) / np.sum(np.exp(300 * all_tp_2)) print('Entropy dp:{:.2f} and Entropy mm:{:.2f}'.format(entropy(all_tp_2), entropy(all_tp))) if entropy(all_tp_2) < entropy(all_tp): all_tp = all_tp_2 for idx, pcx in enumerate(all_pcx): post += pcx * all_px[idx] * all_tp[idx] prior_acc += np.exp(priors[idx]) * all_tp[idx] print_log = "" for ii, x in enumerate(px_save): print_log += "p(x) for Task {:d} ={:.6f} with prior ={:.6f} ".format(ii, x, all_tp[ii]) print(print_log) sys.stdout.flush() post_dict[utt_id] = np.log(post) - config.prior_weight * np.log(prior_acc) return post_dict
def get_output(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) if config.ae_type == "normal": model = nnetAEClassifierMultitask( nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'], nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'], nnet['enc_dropout']) elif config.ae_type == "vae": model = nnetVAEClassifier(nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'], nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'], nnet['enc_dropout'], use_gpu=False) elif config.ae_type == "noae": model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) elif config.ae_type == "vaeenc": nnet[ 'vaeenc'] = "exp_hybrid/hybrid_lll/nnet_vae_enc2l_dec2l_300nodes/exp_1.dir/exp_1__epoch_160.model" vae = torch.load(nnet['vaeenc'], map_location=lambda storage, loc: storage) vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model = VAEEncodedClassifier(vae_model, vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) else: print("Model type {} not supported!".format(config.ae_type)) sys.exit(1) model.load_state_dict(nnet['model_state_dict']) feats_config = pickle.load(open(config.egs_config, 'rb')) lsm = torch.nn.LogSoftmax(1) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) if config.prior: prior = pickle.load(open(config.prior, 'rb')) post_dict = {} model.eval() for utt_id, mat in kaldi_io.read_mat_ark(cmd): mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) if config.ae_type == "normal": out, _ = model(mat, batch_l) elif config.ae_type == "vae": out, _, _ = model(mat, batch_l) elif config.ae_type == "noae": out = model(mat, batch_l) elif config.ae_type == "vaeenc": out = model(mat, batch_l) if config.prior: post_dict[utt_id] = lsm( out[0, :, :]).data.numpy() - config.prior_weight * prior else: if config.add_softmax: post_dict[utt_id] = sm(out[0, :, :]).data.numpy() else: post_dict[utt_id] = out[0, :, :].data.numpy() return post_dict
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load VAE model and define classifier vae = torch.load(config.vae_model, map_location=lambda storage, loc: storage) if config.vae_type == "modulation": vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['nfilters'] * vae['nrepeats'], 0, config.use_gpu) vae_model.load_state_dict(vae["model_state_dict"]) model = nnetRNN(vae['nfilters'] * vae['nrepeats'], config.num_layers, config.hidden_dim, config.num_classes, 0) elif config.vae_type == "arvae": ar_steps = vae['ar_steps'].split(',') ar_steps = [int(x) for x in ar_steps] ar_steps.append(0) vae_model = nnetARVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, len(ar_steps), config.use_gpu) vae_model.load_state_dict(vae["model_state_dict"]) model = nnetRNN(vae['bn_dim'], config.num_layers, config.hidden_dim, config.num_classes, 0) else: vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, config.use_gpu) vae_model.load_state_dict(vae["model_state_dict"]) model = nnetRNN(vae['bn_dim'], config.num_layers, config.hidden_dim, config.num_classes, 0) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (config.num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Data dimension: %d' % (vae['feature_dim'])) logging.info('Number of Frames: %d' % (vae['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Dropout: %f ' % (config.dropout)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() vae_model = vae_model.cuda() criterion = nn.CrossEntropyLoss() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") # Load datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_loss_dev = [] ep_fer_dev = [] err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] tr_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) optimizer.zero_grad() # Main forward pass _, batch_x = vae_model(batch_x, batch_l) batch_x = batch_x[0] # utt wise CMVN normalization batch_x = batch_x - torch.cat( batch_x.shape[1] * [torch.mean(batch_x, dim=1)[:, None, :]], dim=1) batch_x = batch_x / torch.sqrt( torch.cat( batch_x.shape[1] * [torch.var(batch_x, dim=1)[:, None, :]], dim=1)) class_out = model(batch_x, batch_l) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) train_losses.append(loss.item()) if config.use_gpu: tr_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ###################### ##### Validation ##### ###################### model.eval() val_losses = [] val_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) optimizer.zero_grad() # Main forward pass _, batch_x = vae_model(batch_x, batch_l) batch_x = batch_x[0] # utt wise CMVN normalization batch_x = batch_x - torch.cat( batch_x.shape[1] * [torch.mean(batch_x, dim=1)[:, None, :]], dim=1) batch_x = batch_x / torch.sqrt( torch.cat( batch_x.shape[1] * [torch.var(batch_x, dim=1)[:, None, :]], dim=1)) class_out = model(batch_x, batch_l) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) val_losses.append(loss.item()) if config.use_gpu: val_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) # Manage learning rate and revert model if epoch_i == 0: err_p = np.mean(val_losses) best_model_state = model.state_dict() else: if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_losses) best_model_state = model.state_dict() ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format( epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1]) print_log += " || Val: {:.3f} :: Val FER: {:.2f}".format( ep_loss_dev[-1], ep_fer_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'vaeenc': config.vae_model, 'vae_type': config.vae_type, 'feature_dim': vae['feature_dim'], 'num_frames': vae['num_frames'], 'num_classes': config.num_classes, 'num_layers': config.num_layers, 'hidden_dim': config.hidden_dim, 'ep_loss_tr': ep_loss_tr, 'ep_loss_dev': ep_loss_dev, 'dropout': config.dropout, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) streams = powerset(list(np.arange(num_domains))) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] if config.use_gpu: priors = [torch.from_numpy(f).cuda().double() for f in priors] else: priors = [torch.from_numpy(f).double() for f in priors] all_pcx_models = nn.ModuleList(all_pcx_models) all_px_models = nn.ModuleList(all_px_models) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id all_pcx_models.cuda() all_px_models.cuda() if config.task_prior == "dp": print("using data based task priors") task_prior = "dp" else: task_prior = config.task_prior.split(',') task_prior = [float(tp) for tp in task_prior] post_dict = {} for utt_id, batch_x in kaldi_io.read_mat_ark(cmd): print("COMPUTING LOG-LIKELIHOOD FOR UTTERANCE {:s}".format(utt_id)) sys.stdout.flush() T = torch.DoubleTensor([300]) # Initial temperature T.requires_grad = True num_frames = batch_x.shape[0] batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :] batch_l = Variable(torch.IntTensor([batch_x.size(1)])) # Do forward passes through different models sm = torch.nn.Softmax(1) px_save = [] all_pcx = [] all_tp = torch.zeros(len(all_pcx_models), dtype=torch.double) for idx, model in enumerate(all_pcx_models): model.eval() out = model(batch_x, batch_l) ae_out, latent_out = all_px_models[idx](batch_x, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = torch.exp(vae_loss(batch_x[0, :, :], ae_out[0, :, :], latent_out)).double() px_save.append(torch.mean(px)) pcx = sm(out[0, :, :]) all_pcx.append(pcx.double()) if task_prior == "dp": all_tp[idx] = px_save[idx] else: all_tp[idx] = task_prior[idx] for it_num in range(config.num_iter): llh = compute_lhood(num_frames, num_classes, all_pcx, all_tp, priors, task_prior, streams, T) loss = -torch.mean(llh) print_log = "p(x|c) ={:.6f} with softmax temperature ={:.6f} ".format(loss.item(), T.item()) print(print_log) sys.stdout.flush() #loss.backward(retain_graph=True) print(T.grad) # with torch.no_grad(): # T = T + config.lr_rate * T.grad/torch.norm(T.grad,2) #T.requires_grad = True T = T + 100 if config.use_gpu: post_dict[utt_id] = llh.cpu().data.numpy() else: post_dict[utt_id] = llh.data.numpy() return post_dict
def get_output(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) vae = torch.load(nnet['vaeenc'], map_location=lambda storage, loc: storage) if nnet['vae_type'] == "modulation": if config.vae_arch == "cnn": in_channels = [int(x) for x in vae['in_channels'].split(',')] out_channels = [int(x) for x in vae['out_channels'].split(',')] kernel = tuple([int(x) for x in vae['kernel'].split(',')]) vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'], in_channels, out_channels, kernel, vae['nfilters'] * vae['nrepeats'], False) else: vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['nfilters'] * vae['nrepeats'], 0, False) model = nnetRNN(vae['nfilters'] * vae['nrepeats'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet["model_state_dict"]) elif nnet['vae_type'] == "arvae": ar_steps = vae['ar_steps'].split(',') ar_steps = [int(x) for x in ar_steps] ar_steps.append(0) vae_model = nnetARVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, len(ar_steps), False) model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet['model_state_dict']) else: if config.vae_arch == "cnn": in_channels = [int(x) for x in vae['in_channels'].split(',')] out_channels = [int(x) for x in vae['out_channels'].split(',')] kernel = tuple([int(x) for x in vae['kernel'].split(',')]) vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'], in_channels, out_channels, kernel, vae['bn_dim'], False) else: vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], 0) vae_model.load_state_dict(vae["model_state_dict"]) model.load_state_dict(nnet['model_state_dict']) feats_config = pickle.load(open(config.egs_config, 'rb')) lsm = torch.nn.LogSoftmax(1) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) if config.prior: prior = pickle.load(open(config.prior, 'rb')) post_dict = {} model.eval() for utt_id, batch_x in kaldi_io.read_mat_ark(cmd): #print(batch_x.shape) if config.vae_arch == "cnn": batch_l = Variable(torch.IntTensor([batch_x.shape[0]])) batch_x = Variable(torch.FloatTensor(batch_x)) batch_x = batch_x[None, None, :, :] batch_x = torch.transpose(batch_x, 2, 3) _, batch_x = vae_model(batch_x) batch_x = torch.transpose(batch_x[0], 1, 2) else: batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :] batch_l = Variable(torch.IntTensor([batch_x.shape[1]])) _, batch_x = vae_model(batch_x, batch_l) batch_x = batch_x[0] print(batch_x.shape) batch_x = batch_x - torch.cat( batch_x.shape[1] * [torch.mean(batch_x, dim=1)[:, None, :]], dim=1) batch_x = batch_x / torch.sqrt( torch.cat( batch_x.shape[1] * [torch.var(batch_x, dim=1)[:, None, :]], dim=1)) batch_x = model(batch_x, batch_l) if config.prior: print(batch_x[0].shape) sys.stdout.flush() post_dict[utt_id] = lsm( batch_x[0, :, :]).data.numpy() - config.prior_weight * prior else: if config.add_softmax: post_dict[utt_id] = sm(batch_x[0, :, :]).data.numpy() else: post_dict[utt_id] = batch_x[0, :, :].data.numpy() return post_dict
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb')) context = egs_config['concat_feats'] if config.concat_egs_dir: assert config.concat_train_set is not None assert config.concat_dev_set is not None egs_config_concat = pkl.load( open( os.path.join(config.concat_egs_dir, config.concat_train_set, 'egs.config'), 'rb')) context_concat = egs_config_concat['concat_feats'] num_frames = 0 if context is not None: context = context.split(',') num_frames += int(context[0]) + int(context[1]) + 1 else: num_frames += 1 logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers)) logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) logging.info('Output distribution: %s ' % (config.out_dist)) if config.only_AE: logging.info('Training only an Autoencoder') if config.use_transformer: logging.info('Training with Transformer layers instead of RNN') sys.stdout.flush() if config.only_AE: model = nnetVAE(config.feature_dim * num_frames, config.encoder_num_layers, config.decoder_num_layers, config.hidden_dim, config.bn_dim, 0, config.use_gpu, only_AE=True, use_transformer=config.use_transformer) else: model = nnetVAE(config.feature_dim * num_frames, config.encoder_num_layers, config.decoder_num_layers, config.hidden_dim, config.bn_dim, 0, config.use_gpu, use_transformer=config.use_transformer) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) if config.only_AE: ep_ae_rec_tr = [] ep_ae_rec_dev = [] else: ep_vae_rec_tr = [] ep_vae_kl_tr = [] ep_vae_rec_dev = [] ep_vae_kl_dev = [] # Load Datasets if config.concat_egs_dir: dataset_train = nnetDatasetSeqAEConcat( os.path.join(config.egs_dir, config.train_set), os.path.join(config.concat_egs_dir, config.concat_train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeqAEConcat( os.path.join(config.egs_dir, config.dev_set), os.path.join(config.concat_egs_dir, config.concat_dev_set)) data_loader_dev = torch.utils.data.DataLoader( dataset_dev, batch_size=config.batch_size, shuffle=True) else: dataset_train = nnetDatasetSeqAE( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeqAE( os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader( dataset_dev, batch_size=config.batch_size, shuffle=True) err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() if config.only_AE: train_ae_losses = [] else: train_vae_rec_losses = [] train_vae_kl_losses = [] # Main training loop for batch_x, batch_l in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) optimizer.zero_grad() # Main forward pass ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) ae_out = pad2list(ae_out, batch_l) if config.only_AE: loss = ae_loss(batch_x, ae_out, out_dist=config.out_dist) train_ae_losses.append(loss.item()) loss.backward() else: latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss = vae_loss(batch_x, ae_out, latent_out, out_dist=config.out_dist) train_vae_rec_losses.append(loss[0].item()) train_vae_kl_losses.append(loss[1].item()) (-loss[0] - loss[1]).backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() if config.only_AE: ep_ae_rec_tr.append(np.mean(train_ae_losses)) else: ep_vae_rec_tr.append(np.mean(train_vae_rec_losses)) ep_vae_kl_tr.append(np.mean(train_vae_kl_losses)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): if config.only_AE: val_ae_losses = [] else: val_vae_rec_losses = [] val_vae_kl_losses = [] for batch_x, batch_l in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) # Main forward pass ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) ae_out = pad2list(ae_out, batch_l) if config.only_AE: loss = ae_loss(batch_x, ae_out, out_dist=config.out_dist) val_ae_losses.append(loss.item()) else: latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss = vae_loss(batch_x, ae_out, latent_out, out_dist=config.out_dist) val_vae_rec_losses.append(loss[0].item()) val_vae_kl_losses.append(loss[1].item()) if config.only_AE: ep_ae_rec_dev.append(np.mean(val_ae_losses)) else: ep_vae_rec_dev.append(np.mean(val_vae_rec_losses)) ep_vae_kl_dev.append(np.mean(val_vae_kl_losses)) # Manage learning rate if config.only_AE: if epoch_i == 0: err_p = np.mean(val_ae_losses) best_model_state = model.state_dict() else: if np.mean(val_ae_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_ae_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr AE Error: {:.3f} :: Val AE Error: {:.3f}".format( epoch_i + 1, lr, ep_ae_rec_tr[-1], ep_ae_rec_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'encoder_num_layers': config.encoder_num_layers, 'decoder_num_layers': config.decoder_num_layers, 'hidden_dim': config.hidden_dim, 'bn_dim': config.bn_dim, 'ep_ae_rec_tr': ep_ae_rec_tr, 'ep_ae_rec_dev': ep_ae_rec_dev, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) else: if epoch_i == 0: err_p = -np.mean(val_vae_rec_losses) - np.mean( val_vae_kl_losses) best_model_state = model.state_dict() else: if -np.mean(val_vae_rec_losses) - np.mean( val_vae_kl_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = -np.mean(val_vae_rec_losses) - np.mean( val_vae_kl_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr VAE Log-likelihood: {:.3f} :: Val VAE Log-likelihood: {:.3f}".format( epoch_i + 1, lr, ep_vae_kl_tr[-1] + ep_vae_rec_tr[-1], ep_vae_kl_dev[-1] + ep_vae_rec_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'encoder_num_layers': config.encoder_num_layers, 'decoder_num_layers': config.decoder_num_layers, 'hidden_dim': config.hidden_dim, 'bn_dim': config.bn_dim, 'ep_vae_kl_tr': ep_vae_kl_tr, 'ep_vae_rec_tr': ep_vae_rec_tr, 'ep_vae_kl_dev': ep_vae_kl_dev, 'ep_vae_rec_dev': ep_vae_rec_dev, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) streams = powerset(list(np.arange(num_domains))) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format( trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] if config.task_prior == "dp": print("using data based task priors") else: task_prior = config.task_prior.split(',') task_prior = [float(tp) for tp in task_prior] post_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): num = np.zeros((mat.shape[0], num_classes)) denom = np.zeros(num_classes) mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) px_save = [] all_pcx = [] all_px = [] all_tp = [] for idx, model in enumerate(all_pcx_models): model.eval() out = model(mat, batch_l) ae_out, latent_out = all_px_models[idx](mat, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = np.exp( vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy()) px_save.append(np.mean(px)) pcx = sm(out[0, :, :]) px = np.tile(px, (pcx.shape[1], 1)).T all_pcx.append(pcx.data.numpy()) all_px.append(np.ones(px.shape)) if config.task_prior == "dp": all_tp.append(px_save[idx]) else: all_tp.append(task_prior[idx]) if config.task_prior == "dp": all_tp = np.asarray(all_tp, dtype=np.float64) all_tp = np.exp(300 * all_tp) / np.sum(np.exp(300 * all_tp)) for idx, st in enumerate(streams): num_prod = np.ones((num.shape[0], num_classes)) denom_prod = np.ones(num_classes) perf_mon = 1 for b in st: num_prod *= all_pcx[b] # np.power(all_pcx[b], all_tp[b]) perf_mon *= all_tp[b] denom_prod *= np.exp(priors[b]) denom_prod /= np.sum(denom_prod) num_prod = num_prod / np.tile( np.sum(num_prod, axis=1)[:, None], (1, num_prod.shape[1])) num += num_prod * perf_mon denom += denom_prod print_log = "" for ii, x in enumerate(px_save): print_log += "p(x) for Task {:d} ={:.6f} with prior ={:.6f} ".format( ii, x, all_tp[ii]) print(print_log) sys.stdout.flush() post_dict[utt_id] = np.log(num) - config.prior_weight * np.log(denom) return post_dict
def get_output(config): # Load all P(x) and P(c|x) models model_pcx = config.models_pcx.split(',') model_px = config.models_px.split(',') if len(model_pcx) != len(model_px): print("Number of p(x) models and p(c|x) models are not the same!") num_domains = len(model_px) all_pcx_models = [] all_px_models = [] for idx, m in enumerate(model_pcx): nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage) vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage) model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes'], nnet['dropout']) model.load_state_dict(nnet['model_state_dict']) all_pcx_models.append(model) model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'], vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False) model.load_state_dict(vae['model_state_dict']) all_px_models.append(model) num_classes = nnet['num_classes'] feats_config = pickle.load(open(config.egs_config, 'rb')) sm = torch.nn.Softmax(1) if config.override_trans: feat_type = config.override_trans.split(',')[0] trans_path = config.override_trans.split(',')[1] else: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp) elif feat_type == "cmvn_utt": cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = "copy-feats scp:{} ark:- |".format(config.scp) if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load prior priors = config.priors.split(',') priors = [pickle.load(open(f, 'rb')) for f in priors] post_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): post = np.zeros((mat.shape[0], num_classes)) pxx = np.zeros((num_domains, mat.shape[0], num_classes)) prior_acc = np.zeros((mat.shape[0], num_classes)) mat = Variable(torch.FloatTensor(mat))[None, :, :] batch_l = Variable(torch.IntTensor([mat.size(1)])) all_pcx = [] all_px = [] for idx, model in enumerate(all_pcx_models): model.eval() out = model(mat, batch_l) ae_out, latent_out = all_px_models[idx](mat, batch_l) latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :]) px = np.exp(vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy()) pcx = sm(out[0, :, :]) px = np.tile(px, (pcx.shape[1], 1)).T all_pcx.append(pcx.data.numpy()) all_px.append(px) pxx[idx] = px pxx = softmax(pxx) for idx, pcx in enumerate(all_pcx): post += pcx * all_px[idx] * pxx[idx] prior_acc += np.exp(np.tile(priors[idx], (pcx.shape[0], 1))) * pxx[idx] post_dict[utt_id] = np.log(post) - config.prior_weight * np.log(prior_acc) return post_dict