def run(config): # Load the nnet model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) # Load alignment ali_files = [ os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir) if f.startswith('ali.') ] pdf_ali_dict = {} for file in ali_files: if config.ali_type == "pdf": pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) else: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) pdf_ali_dict.update( {u: d - 1 for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)}) # Load feature stuff feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = config.scp if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Get the posterior fer_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): out = model(Variable(torch.FloatTensor(mat))) out = softmax(out[1].data.numpy()) als = pdf_ali_dict[utt_id] preds = np.argmax(out, axis=1) err = (float(preds.shape[0]) - float(np.sum(np.equal( preds, als)))) * 100 / float(preds.shape[0]) fer_dict[utt_id] = err return fer_dict
def load_model(config): nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) model.eval() return model
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Time shifts: %s' % (config.time_shifts)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate ts_list = [int(t) for t in config.time_shifts.split(',')] max_ts = max(ts_list) # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size, shuffle=True) init_fer = True if init_fer: # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] train_losses_pos = [] train_losses_neg = [] for utt_id, mat in kaldi_io.read_mat_ark(cmd): model.eval() if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - torch.FloatTensor(mean).cuda() else: post = out[1] - torch.FloatTensor(mean) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] outputs = ae_model(batch_x, batch_l) # First positive loss mse_pos = samplewise_mse(outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts:-max_ts - 1, :]) # Now find negative loss s = outputs[:, max_ts:-max_ts - 1, :].size() if config.use_gpu: mse_neg = torch.zeros(s[0], s[1]).cuda() else: mse_neg = torch.zeros(s[0], s[1]) count = 0 for t in ts_list: count += 1 mse_neg += samplewise_mse( outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts + t:-max_ts - 1 + t, :]) mse_neg += samplewise_mse( outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts - t:-max_ts - 1 - t, :]) mse_neg = mse_neg / (2 * count) loss = mse_pos.mean() # (mse_pos / mse_neg).mean() train_losses_pos.append(mse_pos.mean().item()) train_losses_neg.append(mse_neg.mean().item()) optimizer.zero_grad() loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update, Tr +ve MSE Loss: {:.3f} :: Tr -ve MSE Loss: {:.3f} :: Dev loss: {:.3f} :: Dev FER: {:.2f}".format( epoch, np.mean(train_losses_pos), np.mean(train_losses_neg), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] tr_losses = [] for idx in range(num_pm_models): tr_losses.append([]) # I want to dump all the posteriors first for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - Variable(torch.FloatTensor(means[0])).cuda() else: post = out[1] - Variable(torch.FloatTensor(means[0])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[0] batch = torch.cat([batch, post[None, :, :]], 0) batches[0] = batch for idx in range(1, num_pm_models): if config.use_gpu: post = out[0][idx] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][idx] - Variable(torch.FloatTensor(means[idx])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[idx] batch = torch.cat([batch, post[None, :, :]], 0) batches[idx] = batch lens.append(min(post.size(0), config.max_seq_len)) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 ## DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) for idx in range(num_pm_models): batch_x = batches[idx][indices] ae_model = pm_models[idx] batch_l = lens[indices] print(batch_x.size()) print(batch_l.size()) sys.stdout.flush() if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = criterion(outputs, batch_x) else: loss = criterion(outputs, batch_x[:, config.time_shift:, :]) tl = tr_losses[idx] tl.append(loss.item() / (config.max_seq_len * config.batch_size)) tr_losses[idx] = tl if idx < num_pm_models - 1: loss.backward(retain_graph=True) else: loss.backward() optimizer.step() batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch)) # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update ".format(epoch) for idx in range(num_pm_models): print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx])) print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save(ep_loss_dev, open(os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save(ep_fer_dev, open(os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader_check = torch.utils.data.DataLoader(dataset, batch_size=5000, shuffle=True) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) unsup_up = True cc = 0 for epoch in range(config.epochs): if unsup_up: # First lets do an unsupervised update with RNN-AE if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) utt_count = 0 update_num = 0 ae_loss = [] lens = [] model.train() for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: post = model(Variable( torch.FloatTensor(mat)).cuda()) - Variable( torch.FloatTensor(mean)).cuda() else: post = model(Variable(torch.FloatTensor(mat))) - Variable( torch.FloatTensor(mean)) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = criterion(outputs, batch_x) else: loss = criterion(outputs, batch_x[:, config.time_shift:, :]) ae_loss.append(loss.item() / (config.max_seq_len * config.batch_size)) loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 logging.info('Finished unsupervised update of nnet') else: logging.info('Skipping unsupervised update of nnet') # Check if any utterance has a good RNN-AE score new_egs = torch.empty(0, nnet['feature_dim'] * nnet['num_frames'] + 1) new_utt_count = 0 for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: post = model(Variable( torch.FloatTensor(mat)).cuda()) - Variable( torch.FloatTensor(mean)).cuda() else: post = model(Variable(torch.FloatTensor(mat))) - Variable( torch.FloatTensor(mean)) lens = [] lens.append(post.shape[0]) post = post[None, :, :] if config.time_shift == 0: outputs = ae_model(post, lens) else: outputs = ae_model(post[:, :-config.time_shift, :], lens - config.time_shift) if config.time_shift == 0: loss = criterion(outputs, post).item() / config.max_seq_len else: loss = criterion( outputs, post[:, config.time_shift:, :]).item() / config.max_seq_len # Add the utterance for supervised update if loss < config.score_threshold: new_utt_count += 1 if config.use_gpu: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)).cuda()) ).cpu().data.numpy(), axis=1) else: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)))).data.numpy(), axis=1) add_egs = np.hstack((mat, labs[:, np.newaxis])) new_egs = torch.cat([new_egs, torch.FloatTensor(add_egs)]) logging.info( 'Added {:d} utterances from new domain to training set'.format( new_utt_count)) ## Update with these new utterances if new_utt_count == 0: logging.info( 'No supervised updates with zero utterances, skipping to next epoch... ' ) else: cc += 1 if cc == 20: config.score_threshold = config.score_threshold * 1.1 cc = 0 unsup_up = False train_data = new_egs[:, 0:-1] train_labels = new_egs[:, -1].long() dataset = nnetDataset(train_data, train_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=5000, shuffle=True) model.train() train_losses = [] tr_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) optimizer.zero_grad() loss = dev_criterion(batch_x, batch_l) train_losses.append(loss.item()) if config.use_gpu: tr_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: tr_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) loss.backward() optimizer.step() ## CHECK IF ADAPTATION IS WORKING AT ALL model.eval() val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} AE Loss: {:.3f} update, Dev loss: {:.3f} :: Dev FER: {:.2f}".format( epoch, np.mean(ae_loss), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=config.log_file, filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) if config.loss == "MSE": criterion = samplewise_mse elif config.loss == "L1": criterion = samplewise_abs else: logging.info('Loss function {:s} is not supported'.format(config.loss)) sys.exit(1) pi = [int(t) for t in config.pm_index.split(',')] # Figure out all feature stuff shuff_file = config.scp feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, shuff_file) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file) else: cmd = shuff_file if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = seq2seqRNNAE(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim'], False, config.decoder_input) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) pm_paths = config.pms.split(',') if len(pi) != len(pm_paths): logging.error("Number of pm models {:d} and number indices {:d} do not match".format(len(pm_paths), len(pi))) sys.exit(0) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) pm_scores = {} for idx in range(num_pm_models): pm_scores[idx] = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): batches = [] lens = mat.shape[0] if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) for idx in range(num_pm_models): if config.use_gpu: if pi[idx] == 0: post = out[1] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][pi[idx]] - Variable(torch.FloatTensor(means[idx])).cuda() else: if pi[idx] == 0: post = out[1] - Variable(torch.FloatTensor(means[0])) else: post = out[0][pi[idx]] - Variable(torch.FloatTensor(means[idx])) batches.append(post) ## Get the PM scores lens = torch.IntTensor([lens]) for idx in range(num_pm_models): batch_x = batches[idx] batch_x = batch_x[None, :, :] ae_model = pm_models[idx] batch_l = lens outputs = ae_model(batch_x, batch_l) loss = criterion(outputs, batch_x).mean() pk = pm_scores[idx] pk[utt_id] = loss.item() pm_scores[idx] = pk pickle.dump(pm_scores, open(os.path.join(config.out_file), "wb"))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (config.num_layers)) logging.info('Hidden Dimension: %d' % (config.feature_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Number of Frames: %d' % (config.num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) sys.stdout.flush() model = nnetFeedforward(config.feature_dim * config.num_frames, config.num_layers, config.hidden_dim, config.num_classes) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") # setup tensorboard logger # tensorboard_logger.configure( # os.path.join(model_dir, config.experiment_name + '.tb_log')) train_chunks = [ os.path.join(config.egs_dir, config.train_set, x) for x in os.listdir(os.path.join(config.egs_dir, config.train_set)) if x.endswith('.pt') ] dev_sets = config.dev_sets.split(",") for x in dev_sets: logging.info('Using Dev set: %s' % (x)) sys.stdout.flush() all_val_chunks = {} for d in dev_sets: all_val_chunks[d] = [ os.path.join(config.egs_dir, d, x) for x in os.listdir(os.path.join(config.egs_dir, d)) if x.endswith('.pt') ] model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_loss_dev = {} ep_fer_dev = {} for x in dev_sets: ep_loss_dev[x] = [] ep_fer_dev[x] = [] for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] tr_fer = [] # Main training loop for chunk in train_chunks: load_chunk = torch.load(chunk) train_data = load_chunk[:, 0:-1] train_labels = load_chunk[:, -1].long() dataset = nnetDataset(train_data, train_labels) data_loader = torch.utils.data.DataLoader( dataset, batch_size=config.batch_size, shuffle=True) for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) optimizer.zero_grad() loss = criterion(batch_x, batch_l) train_losses.append(loss.item()) if config.use_gpu: tr_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: tr_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) loss.backward() optimizer.step() ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): for x in dev_sets: val_losses = [] val_fer = [] val_chunks = all_val_chunks[x] for chunk in val_chunks: load_chunk = torch.load(chunk) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader( dataset, batch_size=config.batch_size, shuffle=True) for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) optimizer.zero_grad() val_loss = criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_d = ep_loss_dev[x] ep_loss_d.append(np.mean(val_losses)) ep_loss_dev[x] = ep_loss_d ep_fer_d = ep_fer_dev[x] ep_fer_d.append(np.mean(val_fer)) ep_fer_dev[x] = ep_fer_d print_log = "Epoch: {:d} Tr loss: {:.3f} :: Tr FER: {:.2f}".format( epoch_i + 1, ep_loss_tr[-1], ep_fer_tr[-1]) for x in dev_sets: print_log += " || Val ({}): {:.3f} :: Tr FER: {:.2f}".format( x, ep_loss_dev[x][-1], ep_fer_dev[x][-1]) logging.info(print_log) torch.save( ep_loss_tr, open( os.path.join(model_dir, "tr_epoch{:d}.loss".format(epoch_i + 1)), 'wb')) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch_i + 1)), 'wb')) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': config.num_frames, 'num_classes': config.num_classes, 'num_layers': config.num_layers, 'hidden_dim': config.hidden_dim, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))