Ejemplo n.º 1
0
 def test_optim2(self):
     elbo_fn = beer.EvidenceLowerBound(len(self.data))
     for i, model in enumerate([self.ppca, self.plda]):
         with self.subTest(i=i):
             optim = beer.BayesianModelCoordinateAscentOptimizer(
                 *model.grouped_parameters, lrate=1.)
             previous = -float('inf')
             for _ in range(100):
                 optim.zero_grad()
                 elbo = elbo_fn(model, self.data)
                 elbo.natural_backward()
                 optim.step()
                 elbo = round(float(elbo) / (len(self.data) * self.dim), 3)
                 self.assertGreaterEqual(elbo - previous, -TOLERANCE)
                 previous = elbo
Ejemplo n.º 2
0
 def test_optim(self):
     for i, model in enumerate(self.models):
         with self.subTest(model=self.conf_files[i]):
             optim = beer.BayesianModelCoordinateAscentOptimizer(
                 model.mean_field_groups, lrate=1.)
             previous = -float('inf')
             for _ in range(N_ITER):
                 self.seed(1)
                 optim.zero_grad()
                 elbo = beer.evidence_lower_bound(model, self.data)
                 elbo.natural_backward()
                 optim.step()
                 elbo = round(float(elbo) / (len(self.data) * self.dim), 3)
                 self.assertGreaterEqual(elbo - previous, -TOLERANCE)
                 previous = elbo
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('lm', help='unigram language model to train')
    parser.add_argument('data', help='data')
    parser.add_argument('outlm', help='output model')
    args = parser.parse_args()

    # Load the model.
    with open(args.lm, 'rb') as fh:
        model = pickle.load(fh)

    # Load the data for the training.
    data = np.load(args.data)

    # Count the number of in the training data.
    tot_counts = 0
    for utt in data:
        tot_counts += len(data[utt])

    # Prepare the optimizer for the training.
    params = model.mean_field_factorization()
    optimizer = beer.BayesianModelCoordinateAscentOptimizer(params, lrate=1.)
    optimizer.zero_grad()

    # Initialize the objective function.
    elbo = beer.evidence_lower_bound(datasize=tot_counts)

    # Re-estimate the LM.
    for utt in data:
        ft = torch.from_numpy(data[utt])
        elbo += beer.evidence_lower_bound(model, ft, datasize=tot_counts)
    elbo.backward()
    optimizer.step()

    # Save the model.
    with open(args.outlm, 'wb') as fh:
        model = pickle.dump(model, fh)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch-size',
                        type=int,
                        default=-1,
                        help='utterance number in each batch')
    parser.add_argument('--epochs',
                        type=int,
                        default=1,
                        help='number of epochs to train')
    parser.add_argument('--fast-eval', action='store_true')
    parser.add_argument('--kl-weight',
                        type=float,
                        default=1.,
                        help='weighting of KL div. of the ELBO')
    parser.add_argument('--lrate-nnet',
                        type=float,
                        default=1e-3,
                        help='learning rate for the nnet components')
    parser.add_argument('--lrate',
                        type=float,
                        default=1.,
                        help='learning rate')
    parser.add_argument('--nnet-optim-state',
                        help='file where to load/save state of the nnet '
                        'optimizer')
    parser.add_argument('--use-gpu', action='store_true')
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('model', help='model to train')
    parser.add_argument('alis', help='alignments')
    parser.add_argument('feats', help='Feature file')
    parser.add_argument('feat_stats', help='data statistics')
    parser.add_argument('out', help='output model')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # Load the data.
    alis = np.load(args.alis)
    feats = np.load(args.feats)
    stats = np.load(args.feat_stats)

    # Load the model and move it to the chosen device (CPU/GPU)
    with open(args.model, 'rb') as fh:
        model = pickle.load(fh)
    if args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = model.to(device)

    # NNET optimizer.
    nnet_optim = torch.optim.Adam(model.modules_parameters(),
                                  lr=args.lrate_nnet,
                                  eps=1e-3,
                                  amsgrad=False,
                                  weight_decay=1e-2)

    if args.nnet_optim_state and os.path.isfile(args.nnet_optim_state):
        logging.debug('load nnet optimizer state: {}'.format(
            args.nnet_optim_state))
        optim_state = torch.load(args.nnet_optim_state)
        nnet_optim.load_state_dict(optim_state)

    # Prepare the optimizer for the training.
    params = model.mean_field_factorization()
    optimizer = beer.BayesianModelCoordinateAscentOptimizer(
        params, lrate=args.lrate, std_optim=nnet_optim)

    # If no batch_size is specified, use the whole data.
    batch_size = len(feats.files)
    if args.batch_size > 0:
        batch_size = args.batch_size

    tot_counts = int(stats['nframes'])
    for epoch in range(1, args.epochs + 1):
        # Shuffle the order of the utterance.
        keys = list(feats.keys())
        random.shuffle(keys)
        batches = [
            keys[i:i + batch_size] for i in range(0, len(keys), batch_size)
        ]
        logging.debug('Data shuffled into {} batches'.format(len(batches)))

        for batch_no, batch_keys in enumerate(batches, start=1):
            # Reset the gradients.
            optimizer.zero_grad()

            # Load the batch data.
            ft, labels = load_batch(feats, alis, batch_keys)
            ft, labels = ft.to(device), labels.to(device)

            # Compute the objective function.
            elbo = beer.evidence_lower_bound(model,
                                             ft,
                                             state_path=labels,
                                             kl_weight=args.kl_weight,
                                             datasize=tot_counts,
                                             fast_eval=args.fast_eval)

            # Compute the gradient of the model.
            #elbo.natural_backward()
            elbo.backward()

            # Clip the gradient to make avoid explosion.
            torch.nn.utils.clip_grad_norm_(model.modules_parameters(), 100.0)

            # Update the parameters.
            optimizer.step()

            elbo_value = float(elbo) / tot_counts
            log_msg = 'epoch={}/{} batch={}/{} elbo={}'
            logging.info(
                log_msg.format(epoch, args.epochs, batch_no, len(batches),
                               round(elbo_value, 3)))

    if args.nnet_optim_state:
        torch.save(nnet_optim.state_dict(), args.nnet_optim_state)

    with open(args.out, 'wb') as fh:
        pickle.dump(model.to(torch.device('cpu')), fh)
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch-size', type=int, default=-1,
                        help='utterance number in each batch')
    parser.add_argument('--epochs', type=int, default=1,
                        help='number of epochs')
    parser.add_argument('--fast-eval', action='store_true')
    parser.add_argument('--lrate', type=float, default=1.,
                        help='learning rate')
    parser.add_argument('--use-gpu', action='store_true')
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('hmm', help='hmm model to train')
    parser.add_argument('alis', help='alignments')
    parser.add_argument('feats', help='Feature file')
    parser.add_argument('feat_stats', help='data statistics')
    parser.add_argument('out', help='output model')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # Load the data.
    alis = np.load(args.alis)
    feats = np.load(args.feats)
    stats = np.load(args.feat_stats)

    # Load the model and move it to the chosen device (CPU/GPU)
    with open(args.hmm, 'rb') as fh:
        model = pickle.load(fh)
    if args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = model.to(device)

    # Prepare the optimizer for the training.
    params = model.mean_field_groups
    optimizer = beer.BayesianModelCoordinateAscentOptimizer(params,
                                                            lrate=args.lrate)

    # If no batch_size is specified, use the whole data.
    batch_size = len(feats.files)
    if args.batch_size > 0:
        batch_size = args.batch_size


    tot_counts = int(stats['nframes'])
    for epoch in range(1, args.epochs + 1):
        # Shuffle the order of the utterance.
        keys = list(feats.keys())
        random.shuffle(keys)
        batches = [keys[i: i + batch_size]
                   for i in range(0, len(keys), batch_size)]
        logging.debug('Data shuffled into {} batches'.format(len(batches)))

        for batch_no, batch_keys in enumerate(batches, start=1):
            # Reset the gradients.
            optimizer.zero_grad()
            elbo = beer.evidence_lower_bound(datasize=tot_counts)
            for uttid in batch_keys:
                # Load the batch data.
                ft = torch.from_numpy(feats[uttid]).float()
                ali = torch.from_numpy(alis[uttid]).long()
                ft, ali = ft.to(device), ali.to(device)

                # Compute the objective function.
                elbo += beer.evidence_lower_bound(model, ft, state_path=ali,
                                                  datasize=tot_counts,
                                                  fast_eval=args.fast_eval)

            # Compute the gradient of the model.
            elbo.natural_backward()

            # Update the parameters.
            optimizer.step()

            elbo_value = float(elbo) / tot_counts
            log_msg = 'epoch={}/{} batch={}/{} elbo={}'
            logging.info(log_msg.format(
                epoch, args.epochs,
                batch_no, len(batches),
                round(elbo_value, 3))
            )

            del ft, ali


    with open(args.out, 'wb') as fh:
        pickle.dump(model.to(torch.device('cpu')), fh)
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--alignments', help='utterance alignemnts')
    parser.add_argument('--batch-size',
                        type=int,
                        help='utterance number in each batch')
    parser.add_argument('--epochs', type=int)
    parser.add_argument('--fast-eval', action='store_true')
    parser.add_argument('--infer-type',
                        default='viterbi',
                        choices=['baum_welch', 'viterbi'],
                        help='how to compute the state posteriors')
    parser.add_argument('--lrate', type=float, help='learning rate')
    parser.add_argument('--tmpdir', help='directory to store intermediary ' \
                                         'models')
    parser.add_argument('--use-gpu', action='store_true')
    parser.add_argument('hmm', help='hmm model to train')
    parser.add_argument('feats', help='Feature file')
    parser.add_argument('feat_stats', help='data statistics')
    parser.add_argument('out', help='output model')
    args = parser.parse_args()

    # Load the data for the training.
    feats = np.load(args.feats)

    ali = None
    if args.alignments:
        ali = np.load(args.alignments)

    stats = np.load(args.feat_stats)

    with open(args.hmm, 'rb') as fh:
        model = pickle.load(fh)

    if args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = model.to(device)

    # Prepare the optimizer for the training.
    params = model.mean_field_groups
    optimizer = beer.BayesianModelCoordinateAscentOptimizer(params,
                                                            lrate=args.lrate)

    tot_counts = int(stats['nframes'])
    for epoch in range(1, args.epochs + 1):

        # Shuffle the order of the utterance.
        keys = list(feats.keys())
        random.shuffle(keys)
        batches = [
            keys[i:i + args.batch_size]
            for i in range(0, len(keys), args.batch_size)
        ]
        logging.debug('Data shuffled into {} batches'.format(len(batches)))

        # One mini-batch update.
        for batch_no, batch_keys in enumerate(batches, start=1):
            # Reset the gradients.
            optimizer.zero_grad()

            # Initialize the ELBO.
            elbo = beer.evidence_lower_bound(datasize=tot_counts)

            for utt in batch_keys:
                ft = torch.from_numpy(feats[utt]).float().to(device)

                # Get the alignment graph if provided.
                graph = None
                if ali is not None:
                    graph = ali[utt][0].to(device)

                elbo += beer.evidence_lower_bound(
                    model,
                    ft,
                    datasize=tot_counts,
                    fast_eval=args.fast_eval,
                    inference_graph=graph,
                    inference_type=args.infer_type)

            # Compute the gradient of the model.
            elbo.natural_backward()

            # Update the parameters.
            optimizer.step()

            elbo_value = float(elbo) / (tot_counts * len(batch_keys))
            log_msg = 'epoch={}/{}  batch={}/{}  ELBO={}'
            logging.info(
                log_msg.format(epoch, args.epochs, batch_no, len(batches),
                               round(elbo_value, 3)))

        if args.tmpdir:
            path = os.path.join(args.tmpdir, str(epoch) + '.mdl')
            with open(path, 'wb') as fh:
                pickle.dump(model.to(torch.device('cpu')), fh)

    with open(args.out, 'wb') as fh:
        pickle.dump(model.to(torch.device('cpu')), fh)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs',
                        type=int,
                        default=1,
                        help='number of epochs to train')
    parser.add_argument('--fast-eval', action='store_true')
    parser.add_argument('--lrate',
                        type=float,
                        default=1.,
                        help='learning rate')
    parser.add_argument('--use-gpu', action='store_true')
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('model', help='model to train')
    parser.add_argument('batches', help='list of batches file')
    parser.add_argument('feat_stats', help='data statistics')
    parser.add_argument('out', help='output model')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # Load the data.
    stats = np.load(args.feat_stats)

    # Load the batches.
    batches_list = []
    with open(args.batches, 'r') as f:
        for line in f:
            batches_list.append(line.strip())

    # Load the model and move it to the chosen device (CPU/GPU)
    with open(args.model, 'rb') as fh:
        model = pickle.load(fh)
    if args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = model.to(device)

    # Prepare the optimizer for the training.
    params = model.mean_field_groups
    optimizer = beer.BayesianModelCoordinateAscentOptimizer(params,
                                                            lrate=args.lrate)

    tot_counts = int(stats['nframes'])
    for epoch in range(1, args.epochs + 1):
        # Shuffle the order of the utterance.
        random.shuffle(batches_list)
        for batch_no, path in enumerate(batches_list, start=1):
            # Reset the gradients.
            optimizer.zero_grad()

            # Load the batch data.
            batch = np.load(path)
            ft = torch.from_numpy(batch['features']).float()
            ft = ft.to(device)

            # Compute the objective function.
            elbo = beer.evidence_lower_bound(model,
                                             ft,
                                             datasize=tot_counts,
                                             fast_eval=args.fast_eval)

            # Compute the gradient of the model.
            elbo.natural_backward()

            # Update the parameters.
            optimizer.step()

            elbo_value = float(elbo) / tot_counts
            log_msg = 'epoch={}/{} batch={}/{} elbo={}'
            logging.info(
                log_msg.format(epoch, args.epochs, batch_no, len(batches_list),
                               round(elbo_value, 3)))

    with open(args.out, 'wb') as fh:
        pickle.dump(model.to(torch.device('cpu')), fh)