Esempio n. 1
0
def expectationMaximization(distPrev,
                            accumulate,
                            createAcc=d.getDefaultCreateAcc(),
                            estimateTotAux=d.getDefaultEstimateTotAux(),
                            afterAcc=None,
                            monotoneAux=True,
                            verbosity=0):
    """Performs one step of expectation maximization.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling has no effect on the dist returned
    by this function.
    """
    acc = createAcc(distPrev)
    accumulate(acc)
    if afterAcc is not None:
        afterAcc(acc)
    logLikePrev = acc.logLike()
    count = acc.count()
    count = max(count, 1.0)
    dist, (aux, auxRat) = estimateTotAux(acc)
    if monotoneAux and aux < logLikePrev:
        raise RuntimeError(
            're-estimated auxiliary value (%s) less than previous log likelihood (%s) during expectation-maximization (count = %s)'
            % (aux / count, logLikePrev / count, count))
    if verbosity >= 2:
        print 'trainEM:    logLikePrev = %s -> aux = %s (%s) (%s count)' % (
            logLikePrev / count, aux / count, d.Rat.toString(auxRat), count)
    return dist, logLikePrev, (aux, auxRat), count
Esempio n. 2
0
def trainCGandEM(distInit,
                 accumulate,
                 ps=d.getDefaultParamSpec(),
                 createAccEM=d.getDefaultCreateAcc(),
                 estimateTotAux=d.getDefaultEstimateTotAux(),
                 iterations=5,
                 length=-50,
                 afterEst=None,
                 verbosity=0):
    """Re-estimates a distribution using conjugate gradients and EM.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling is presumed to have only a small
    impact on the dist returned by this function (via its impact on trainCG).
    """
    assert iterations >= 1

    dist = distInit
    for it in range(1, iterations + 1):
        if verbosity >= 1:
            print 'trainCGandEM: starting it =', it, 'of CG and EM'

        dist = (timed(trainCG) if verbosity >= 2 else trainCG)(
            dist, accumulate, ps=ps, length=length, verbosity=verbosity)

        dist, _, _, _ = expectationMaximization(dist,
                                                accumulate,
                                                createAcc=createAccEM,
                                                estimateTotAux=estimateTotAux,
                                                verbosity=verbosity)

        if afterEst is not None:
            afterEst(dist=dist, it=it)

        if verbosity >= 1:
            print 'trainCGandEM: finished it =', it, 'of CG and EM'
            print 'trainCGandEM:'

    return dist
Esempio n. 3
0
def trainEM(distInit,
            accumulate,
            createAcc=d.getDefaultCreateAcc(),
            estimateTotAux=d.getDefaultEstimateTotAux(),
            logLikePrevInit=float('-inf'),
            deltaThresh=1e-8,
            minIterations=1,
            maxIterations=None,
            beforeAcc=None,
            afterAcc=None,
            afterEst=None,
            monotone=False,
            monotoneAux=True,
            verbosity=0):
    """Re-estimates a distribution using expectation maximization.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling only affects the dist returned by
    this function to the extent that it effectively scales the deltaThresh
    threshold used to assess convergence, and so may sometimes affect the number
    of iterations of expectation maximization performed.
    """
    assert minIterations >= 1
    assert maxIterations is None or maxIterations >= minIterations

    dist = distInit
    logLikePrev = logLikePrevInit
    it = 0
    converged = False
    while it < minIterations or (not converged) and (maxIterations is None
                                                     or it < maxIterations):
        if beforeAcc is not None:
            beforeAcc(dist)
        logLikePrevPrev = logLikePrev
        if verbosity >= 2:
            print 'trainEM: it %s:' % (it + 1)
        dist, logLikePrev, (aux, auxRat), count = expectationMaximization(
            dist,
            accumulate,
            createAcc=createAcc,
            estimateTotAux=estimateTotAux,
            afterAcc=afterAcc,
            monotoneAux=monotoneAux,
            verbosity=verbosity)
        deltaLogLikePrev = logLikePrev - logLikePrevPrev
        if monotone and deltaLogLikePrev < 0.0:
            raise RuntimeError(
                'log likelihood decreased during expectation-maximization')
        if verbosity >= 2:
            print 'trainEM:    deltaLogLikePrev = %s' % (deltaLogLikePrev /
                                                         count)
        if afterEst is not None:
            afterEst(dist=dist, it=it)
        converged = (abs(deltaLogLikePrev) <= deltaThresh * count)
        it += 1

    if verbosity >= 1:
        if converged:
            print 'trainEM: converged at thresh', deltaThresh, 'in', it, 'iterations'
        else:
            print 'trainEM: did NOT converge at thresh', deltaThresh, 'in', it, 'iterations'

    return dist
Esempio n. 4
0
def trainEM(distInit, accumulate, createAcc = d.getDefaultCreateAcc(), estimateTotAux = d.getDefaultEstimateTotAux(), logLikePrevInit = float('-inf'), deltaThresh = 1e-8, minIterations = 1, maxIterations = None, beforeAcc = None, afterAcc = None, afterEst = None, monotone = False, monotoneAux = True, verbosity = 0):
    """Re-estimates a distribution using expectation maximization.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling only affects the dist returned by
    this function to the extent that it effectively scales the deltaThresh
    threshold used to assess convergence, and so may sometimes affect the number
    of iterations of expectation maximization performed.
    """
    assert minIterations >= 1
    assert maxIterations is None or maxIterations >= minIterations

    dist = distInit
    logLikePrev = logLikePrevInit
    it = 0
    converged = False
    while it < minIterations or (not converged) and (maxIterations is None or it < maxIterations):
        if beforeAcc is not None:
            beforeAcc(dist)
        logLikePrevPrev = logLikePrev
        if verbosity >= 2:
            print 'trainEM: it %s:' % (it + 1)
        dist, logLikePrev, (aux, auxRat), count = expectationMaximization(dist, accumulate, createAcc = createAcc, estimateTotAux = estimateTotAux, afterAcc = afterAcc, monotoneAux = monotoneAux, verbosity = verbosity)
        deltaLogLikePrev = logLikePrev - logLikePrevPrev
        if monotone and deltaLogLikePrev < 0.0:
            raise RuntimeError('log likelihood decreased during expectation-maximization')
        if verbosity >= 2:
            print 'trainEM:    deltaLogLikePrev = %s' % (deltaLogLikePrev / count)
        if afterEst is not None:
            afterEst(dist = dist, it = it)
        converged = (abs(deltaLogLikePrev) <= deltaThresh * count)
        it += 1

    if verbosity >= 1:
        if converged:
            print 'trainEM: converged at thresh', deltaThresh, 'in', it, 'iterations'
        else:
            print 'trainEM: did NOT converge at thresh', deltaThresh, 'in', it, 'iterations'

    return dist
Esempio n. 5
0
def expectationMaximization(distPrev, accumulate, createAcc = d.getDefaultCreateAcc(), estimateTotAux = d.getDefaultEstimateTotAux(), afterAcc = None, monotoneAux = True, verbosity = 0):
    """Performs one step of expectation maximization.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling has no effect on the dist returned
    by this function.
    """
    acc = createAcc(distPrev)
    accumulate(acc)
    if afterAcc is not None:
        afterAcc(acc)
    logLikePrev = acc.logLike()
    count = acc.count()
    count = max(count, 1.0)
    dist, (aux, auxRat) = estimateTotAux(acc)
    if monotoneAux and aux < logLikePrev:
        raise RuntimeError('re-estimated auxiliary value (%s) less than previous log likelihood (%s) during expectation-maximization (count = %s)' % (aux / count, logLikePrev / count, count))
    if verbosity >= 2:
        print 'trainEM:    logLikePrev = %s -> aux = %s (%s) (%s count)' % (logLikePrev / count, aux / count, d.Rat.toString(auxRat), count)
    return dist, logLikePrev, (aux, auxRat), count
Esempio n. 6
0
def trainCGandEM(distInit, accumulate, ps = d.getDefaultParamSpec(), createAccEM = d.getDefaultCreateAcc(), estimateTotAux = d.getDefaultEstimateTotAux(), iterations = 5, length = -50, afterEst = None, verbosity = 0):
    """Re-estimates a distribution using conjugate gradients and EM.

    See the note in the docstring for this module for information on how the
    log likelihood is scaled. This scaling is presumed to have only a small
    impact on the dist returned by this function (via its impact on trainCG).
    """
    assert iterations >= 1

    dist = distInit
    for it in range(1, iterations + 1):
        if verbosity >= 1:
            print 'trainCGandEM: starting it =', it, 'of CG and EM'

        dist = (timed(trainCG) if verbosity >= 2 else trainCG)(dist, accumulate, ps = ps, length = length, verbosity = verbosity)

        dist, _, _, _ = expectationMaximization(dist, accumulate, createAcc = createAccEM, estimateTotAux = estimateTotAux, verbosity = verbosity)

        if afterEst is not None:
            afterEst(dist = dist, it = it)

        if verbosity >= 1:
            print 'trainCGandEM: finished it =', it, 'of CG and EM'
            print 'trainCGandEM:'

    return dist