Exemple #1
0
def run_sampler(model, corpus, n_iter):
    n_words = sum(len(e) for f, e in corpus)
    alignments = [None] * len(corpus)
    samples = []
    for it in range(n_iter):
        logging.info('Iteration %d/%d', it + 1, n_iter)
        for i, (f, e) in enumerate(corpus):
            if it > 0: model.decrement(f, e, alignments[i])
            alignments[i] = list(model.increment(f, e))
        if it % 10 == 0:
            logging.info('Model: %s', model)
            ll = model.log_likelihood()
            ppl = math.exp(-ll / n_words)
            logging.info('LL=%.0f ppl=%.3f', ll, ppl)
        if it % 30 == 29:
            logging.info('Resampling hyperparameters...')
            acceptance, rejection = model.resample_hyperparemeters(mh_iter)
            arate = acceptance / float(acceptance + rejection)
            logging.info('Metropolis-Hastings acceptance rate: %.4f', arate)
            logging.info('Model: %s', model)
        if it > n_iter / 10 and it % 10 == 0:
            logging.info('Estimating sample')
            samples.append(model.map_estimate())

    logging.info('Combining %d samples', len(samples))
    align = AlignmentModel.combine(samples)
    for i, (f, e) in enumerate(corpus):
        alignments[i] = list(align(f, e))
    return alignments
Exemple #2
0
def run_sampler(model, corpus, n_iter):
    n_words = sum(len(e) for f, e in corpus)
    alignments = [None] * len(corpus)
    samples = []
    for it in range(n_iter):
        logging.info('Iteration %d/%d', it+1, n_iter)
        for i, (f, e) in enumerate(corpus):
            if it > 0: model.decrement(f, e, alignments[i])
            alignments[i] = list(model.increment(f, e))
        if it % 10 == 0:
            logging.info('Model: %s', model)
            ll = model.log_likelihood()
            ppl = math.exp(-ll / n_words)
            logging.info('LL=%.0f ppl=%.3f', ll, ppl)
        if it % 30 == 29:
            logging.info('Resampling hyperparameters...')
            acceptance, rejection = model.resample_hyperparemeters(mh_iter)
            arate = acceptance / float(acceptance + rejection)
            logging.info('Metropolis-Hastings acceptance rate: %.4f', arate)
            logging.info('Model: %s', model)
        if it > n_iter/10 and it % 10 == 0:
            logging.info('Estimating sample')
            samples.append(model.map_estimate())

    logging.info('Combining %d samples', len(samples))
    align = AlignmentModel.combine(samples)
    for i, (f, e) in enumerate(corpus):
        alignments[i] = list(align(f, e))
    return alignments