Beispiel #1
0
def L_top(rho=None, omega=None, alpha=None,
          gamma=None, kappa=0, startAlpha=0, **kwargs):
    ''' Evaluate the top-level term of the surrogate objective
    '''
    if startAlpha == 0:
        startAlpha = alpha

    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digamma_omega = digamma(omega)
    ElogU = digamma(eta1) - digamma_omega
    Elog1mU = digamma(eta0) - digamma_omega
    diff_cBeta = K * c_Beta(1.0, gamma) - c_Beta(eta1, eta0)

    tAlpha = K * K * np.log(alpha) + K * np.log(startAlpha)
    if kappa > 0:
        coefU = K + 1.0 - eta1
        coef1mU = K * OptimizerRhoOmega.kvec(K) + 1.0 + gamma - eta0
        sumEBeta = np.sum(rho2beta(rho, returnSize='K'))
        tBeta = sumEBeta * (np.log(alpha + kappa) - np.log(kappa))
        tKappa = K * (np.log(kappa) - np.log(alpha + kappa))
    else:
        coefU = (K + 1) + 1.0 - eta1
        coef1mU = (K + 1) * OptimizerRhoOmega.kvec(K) + gamma - eta0
        tBeta = 0
        tKappa = 0

    diff_logU = np.inner(coefU, ElogU) \
        + np.inner(coef1mU, Elog1mU)
    return tAlpha + tKappa + tBeta + diff_cBeta + diff_logU
Beispiel #2
0
    def testHasSaneOutput__objFunc_constrained(self):
        ''' Verify objective and gradient vector have correct type and size

        f should be a finite scalar
        g should be a vector of size 2K (first half is drho, second is domega)
        '''
        for K in [1, 2, 10, 101]:
            for seed in [33, 77, 888]:
                for oScale in [1, 10]:
                    for approx_grad in [0, 1]:
                        PRNG = np.random.RandomState(seed)
                        rho = PRNG.rand(K)
                        omega = oScale * PRNG.rand(K)
                        rhoomega = np.hstack([rho, omega])
                        kwargs = dict(alpha=1,
                                      gamma=1,
                                      nDoc=0,
                                      sumLogPi=np.zeros(K + 1))
                        if approx_grad:
                            f = OptimizerRhoOmega.objFunc_constrained(
                                rhoomega, approx_grad=1, **kwargs)
                            g = np.ones(
                                2 * K
                            )  # placeholder, this would be done automatically
                        else:
                            f, g = OptimizerRhoOmega.objFunc_constrained(
                                rhoomega, approx_grad=0, **kwargs)
                        assert isinstance(f, np.float64)
                        assert g.ndim == 1
                        assert g.size == 2 * K
                        assert np.isfinite(f)
                        assert np.all(np.isfinite(g))
Beispiel #3
0
def learn_rhoomega_fromFixedCounts(DocTopicCount_d=None,
                                   nDoc=0,
                                   alpha=None,
                                   gamma=None,
                                   initrho=None,
                                   initomega=None):
    Nd = np.sum(DocTopicCount_d)
    K = DocTopicCount_d.size
    if initrho is None:
        rho = OptimizerRhoOmega.create_initrho(K)
    else:
        rho = initrho
    if initomega is None:
        omega = OptimizerRhoOmega.create_initomega(K, nDoc, gamma)
    else:
        omega = initomega

    evalELBOandPrint(
        rho=rho,
        omega=omega,
        DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)),
        alpha=alpha,
        gamma=gamma,
        msg='init',
    )
    betaK = rho2beta(rho, returnSize="K")
    prevbetaK = np.zeros_like(betaK)
    iterid = 0
    while np.sum(np.abs(betaK - prevbetaK)) > 0.000001:
        iterid += 1
        theta_d = DocTopicCount_d + alpha * betaK
        thetaRem = alpha * (1 - np.sum(betaK))
        assert np.allclose(theta_d.sum() + thetaRem, alpha + Nd)
        digammaSum = digamma(theta_d.sum() + thetaRem)
        Elogpi_d = digamma(theta_d) - digammaSum
        ElogpiRem = digamma(thetaRem) - digammaSum
        sumLogPi = nDoc * np.hstack([Elogpi_d, ElogpiRem])

        rho, omega, f, Info = OptimizerRhoOmega.\
            find_optimum_multiple_tries(
                alpha=alpha,
                gamma=gamma,
                sumLogPi=sumLogPi,
                nDoc=nDoc,
                initrho=rho,
                initomega=omega,
                approx_grad=1,
            )
        prevbetaK = betaK.copy()
        betaK = rho2beta(rho, returnSize="K")
        if iterid < 5 or iterid % 10 == 0:
            evalELBOandPrint(
                rho=rho,
                omega=omega,
                DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)),
                alpha=alpha,
                gamma=gamma,
                msg=str(iterid),
            )
    return rho, omega
Beispiel #4
0
 def testHasSaneOutput__objFunc_unconstrained(self):
     ''' Verify objective and gradient vector have correct type and size
     '''
     for K in [1, 2, 10, 100]:
         for seed in [33, 77, 444]:
             for oScale in [1, 45]:
                 for approx_grad in [0, 1]:
                     PRNG = np.random.RandomState(seed)
                     rho = PRNG.rand(K)
                     omega = oScale * PRNG.rand(K)
                     rhoomega = np.hstack([rho, omega])
                     kwargs = dict(alpha=1,
                                   gamma=1,
                                   nDoc=0,
                                   sumLogPi=np.zeros(K + 1))
                     c = OptimizerRhoOmega.rhoomega2c(rhoomega)
                     if approx_grad:
                         f = OptimizerRhoOmega.objFunc_unconstrained(
                             c, approx_grad=1, **kwargs)
                         g = np.ones(2 * K)
                     else:
                         f, g = OptimizerRhoOmega.objFunc_unconstrained(
                             c, approx_grad=0, **kwargs)
                     assert isinstance(f, np.float64)
                     assert g.ndim == 1
                     assert g.size == 2 * K
                     assert np.isfinite(f)
                     assert np.all(np.isfinite(g))
Beispiel #5
0
    def update_global_params_VB(self, SS,
                                mergeCompA=None, mergeCompB=None,
                                **kwargs):
        ''' Update global parameters.
        '''
        self.K = SS.K
        if not hasattr(self, 'rho') or self.rho.size != SS.K:
            # Big change from previous model is being proposed.
            # We'll init rho from scratch, and need more iters to improve.
            nGlobalIters = self.nGlobalItersBigChange
        else:
            # Small change required. Current rho is good initialization.
            nGlobalIters = self.nGlobalIters

        # Special update case for merges:
        # Fast, heuristic update for new rho given original value
        if mergeCompA is not None:
            beta = OptimizerRhoOmega.rho2beta_active(self.rho)
            beta[mergeCompA] += beta[mergeCompB]
            beta = np.delete(beta, mergeCompB, axis=0)
            self.rho = OptimizerRhoOmega.beta2rho(beta, SS.K)
            omega = self.omega
            omega[mergeCompA] += omega[mergeCompB]
            self.omega = np.delete(omega, mergeCompB, axis=0)
        # TODO think about smarter init for rho/omega??

        # Update theta with recently updated info from suff stats
        self.transTheta, self.startTheta = self._calcTheta(SS)

        for giter in range(nGlobalIters):
            # Update rho, omega through numerical optimization
            self.rho, self.omega = self.find_optimum_rhoOmega(**kwargs)
            # Update theta again to reflect the new rho, omega
            self.transTheta, self.startTheta = self._calcTheta(SS)
    def testRecoverAnalyticOptimum__find_optimum(self):
        ''' Verify find_optimum's result is indistiguishable from analytic opt
        '''
        for K in [1, 10, 23, 61, 68, 100]:
            for alpha in [0.1, 0.95]:
                for gamma in [1.1, 3.141, 9.45, 21.1337]:
                    for kappa in [0, 100]:
                        print('============== K %d | gamma %.2f' % (K, gamma))
                        for seed in [111, 222, 333]:
                            PRNG = np.random.RandomState(seed)
                            initrho = PRNG.rand(K)
                            initomega = 100 * PRNG.rand(K)
                            scaleVec = np.hstack(
                                [np.ones(K), gamma * np.ones(K)])
                            kwargs = dict(alpha=alpha,
                                          gamma=gamma,
                                          scaleVector=scaleVec,
                                          nDoc=0,
                                          sumLogPi=np.zeros(K + 1))
                            ro, f, Info = OptimizerRhoOmega.find_optimum(
                                initrho=initrho, initomega=initomega, **kwargs)
                            rho_est, omega_est, KK = OptimizerRhoOmega._unpack(
                                ro)
                            assert np.all(np.isfinite(rho_est))
                            assert np.all(np.isfinite(omega_est))
                            assert np.isfinite(f)
                            print(Info['task'])

                            rho_opt = 1.0 / (1. + gamma) * np.ones(K)
                            omega_opt = (1. + gamma) * np.ones(K)

                            print('  rho_est', np2flatstr(rho_est,
                                                          fmt='%9.6f'))
                            print('  rho_opt', np2flatstr(rho_opt,
                                                          fmt='%9.6f'))

                            print('  omega_est',
                                  np2flatstr(omega_est, fmt='%9.6f'))
                            print('  omega_opt',
                                  np2flatstr(omega_opt, fmt='%9.6f'))

                            beta_est = OptimizerRhoOmega.rho2beta_active(
                                rho_est)
                            beta_opt = OptimizerRhoOmega.rho2beta_active(
                                rho_opt)
                            print('  beta_est',
                                  np2flatstr(beta_est, fmt='%9.6f'))
                            print('  beta_opt',
                                  np2flatstr(beta_opt, fmt='%9.6f'))

                            assert np.allclose(beta_est, beta_opt, atol=1e-4)

                            assert np.allclose(omega_est,
                                               omega_opt,
                                               atol=1e-5,
                                               rtol=0.01)
Beispiel #7
0
    def _calcTheta(self, SS):
        ''' Update parameters theta to maximize objective given suff stats.

        Returns
        ---------
        transTheta : 2D array, size K x K+1
        startTheta : 1D array, size K
        '''
        K = SS.K
        if not hasattr(self, 'rho') or self.rho.size != K:
            self.rho = OptimizerRhoOmega.create_initrho(K)

        # Calculate E_q[alpha * Beta_l] for l = 1, ..., K+1
        Ebeta = StickBreakUtil.rho2beta(self.rho)
        alphaEBeta = self.transAlpha * Ebeta

        # transTheta_kl = M_kl + E_q[alpha * Beta_l] + kappa * 1_{k==l}
        transTheta = np.zeros((K, K + 1))
        transTheta += alphaEBeta[np.newaxis, :]
        transTheta[:K, :K] += SS.TransStateCount + self.kappa * np.eye(self.K)

        # startTheta_k = r_1k + E_q[alpha * Beta_l] (where r_1,>K = 0)
        startTheta = self.startAlpha * Ebeta
        startTheta[:K] += SS.StartStateCount
        return transTheta, startTheta
Beispiel #8
0
    def testHasSaneOutput__objFunc_constrained(self, hmmKappa=10.0):
        ''' Verify objective and gradient vector have correct type and size
        '''
        for K in [1, 2, 10]:
            for alpha in [0.1, 0.9]:
                for seed in [333, 777, 888]:

                    PRNG = np.random.RandomState(seed)
                    u = np.linspace(0.1, 0.9, K)
                    Vd = sampleVd(u, K + 1, alpha, PRNG=PRNG)
                    sumLogPi = summarizeVdToPi(Vd)

                    # Randomly initialize rho and omega
                    rho = PRNG.rand(K)
                    omega = K * PRNG.rand(K)
                    rhoomega = np.hstack([rho, omega])

                    kwargs = dict(alpha=alpha,
                                  gamma=1,
                                  nDoc=K + 1,
                                  kappa=hmmKappa,
                                  sumLogPi=sumLogPi)

                    # Compute objective function
                    # and its gradient (if approximation not occuring)
                    for approx_grad in [0, 1]:
                        if approx_grad:
                            f = OptimizerRhoOmega.objFunc_constrained(
                                rhoomega, approx_grad=1, **kwargs)
                            fapprox = f

                        else:
                            f, g = OptimizerRhoOmega.objFunc_constrained(
                                rhoomega, approx_grad=0, **kwargs)
                            fexact = f
                        assert isinstance(f, np.float64)
                        assert np.isfinite(f)

                        if not approx_grad:
                            assert g.ndim == 1
                            assert g.size == 2 * K
                            assert np.all(np.isfinite(g))

                    print fexact
                    print fapprox
                    print ''
                    assert np.allclose(fexact, fapprox)
Beispiel #9
0
    def _convert_beta2rhoomega(self, beta, nDoc=10):
        ''' Find vectors rho, omega that are probable given beta

        Returns
        --------
        rho : 1D array, size K
        omega : 1D array, size K
        '''
        assert abs(np.sum(beta) - 1.0) < 0.001
        rho = OptimizerRhoOmega.beta2rho(beta, self.K)
        omega = (nDoc + self.gamma) * np.ones(rho.size)
        return rho, omega
Beispiel #10
0
    def testGradientExactAndApproxAgree__objFunc_constrained(self):
        ''' Verify computed gradient similar for exact and approx methods
        '''
        print ''
        for K in [1, 10, 107]:
            for alpha in [0.1, 0.95]:
                for gamma in [1., 3.14, 9.45]:
                    for seed in [111, 222, 333]:
                        PRNG = np.random.RandomState(seed)
                        rho = PRNG.rand(K)
                        omega = 100 * PRNG.rand(K)
                        rhoomega = np.hstack([rho, omega])
                        kwargs = dict(alpha=alpha,
                                      gamma=gamma,
                                      nDoc=0,
                                      sumLogPi=np.zeros(K + 1))

                        # Exact gradient
                        _, g = OptimizerRhoOmega.objFunc_constrained(
                            rhoomega, approx_grad=0, **kwargs)
                        # Numerical gradient
                        objFunc_cons = OptimizerRhoOmega.objFunc_constrained
                        objFunc = lambda x: objFunc_cons(
                            x, approx_grad=1, **kwargs)
                        epsvec = np.hstack(
                            [1e-8 * np.ones(K), 1e-8 * np.ones(K)])
                        gapprox = approx_fprime(rhoomega, objFunc, epsvec)

                        print '      rho 1:10 ', np2flatstr(rho)
                        print '     grad 1:10 ', np2flatstr(g[:K], fmt='% .6e')
                        print '     grad 1:10 ', np2flatstr(gapprox[:K],
                                                            fmt='% .6e')
                        if K > 10:
                            print '    rho K-10:K ', np2flatstr(rho[-10:])
                            print '   grad K-10:K ', np2flatstr(g[K - 10:K],
                                                                fmt='% .6e')
                            print 'gapprox K-10:K ', np2flatstr(gapprox[K -
                                                                        10:K],
                                                                fmt='% .6e')
                        assert np.allclose(g[:K],
                                           gapprox[:K],
                                           atol=1e-6,
                                           rtol=0.01)

                        print np2flatstr(g[K:])
                        print np2flatstr(gapprox[K:])
                        assert np.allclose(g[K:],
                                           gapprox[K:],
                                           atol=1e-4,
                                           rtol=0.05)
Beispiel #11
0
 def testGradientZeroAtOptimum__objFunc_unconstrained(self):
     ''' Verify computed gradient at optimum is indistinguishable from zero
     '''
     print ''
     for K in [1, 10, 107]:
         for alpha in [0.1, 0.95]:
             for gamma in [1., 3.14, 9.45]:
                 rho = 1.0 / (1. + gamma) * np.ones(K)
                 omega = (1 + gamma) * np.ones(K)
                 rhoomega = np.hstack([rho, omega])
                 kwargs = dict(alpha=alpha,
                               gamma=gamma,
                               scaleVector=np.hstack(
                                   [np.ones(K), gamma * np.ones(K)]),
                               nDoc=0,
                               sumLogPi=np.zeros(K + 1))
                 c = OptimizerRhoOmega.rhoomega2c(
                     rhoomega, scaleVector=kwargs['scaleVector'])
                 f, g = OptimizerRhoOmega.objFunc_unconstrained(
                     c, approx_grad=0, **kwargs)
                 print '       rho  ', np2flatstr(rho[:K])
                 print '  grad rho  ', np2flatstr(g[:K])
                 assert np.allclose(g, np.zeros(2 * K))
Beispiel #12
0
    def testHasSaneOutput__objFunc_constrained(self):
        ''' Verify objective and gradient vector have correct type and size
        '''
        for K in [1, 2, 10, 101]:
            for seed in [33, 77, 888]:
                for alpha in [0.1, 0.9]:
                    for nDoc in [1, 50, 5000]:
                        PRNG = np.random.RandomState(seed)
                        u = np.linspace(0.1, 0.9, K)
                        Vd = sampleVd(u, nDoc, alpha, PRNG=PRNG)
                        sumLogPi = summarizeVdToPi(Vd)
                        rho = PRNG.rand(K)
                        omega = nDoc * PRNG.rand(K)
                        for approx_grad in [0, 1]:
                            rhoomega = np.hstack([rho, omega])
                            kwargs = dict(alpha=0.5,
                                          gamma=1,
                                          nDoc=nDoc,
                                          sumLogPi=sumLogPi)
                            if approx_grad:
                                f = OptimizerRhoOmega.objFunc_constrained(
                                    rhoomega, approx_grad=1, **kwargs)
                                g = np.ones(2 * K)
                                fapprox = f

                            else:
                                f, g = OptimizerRhoOmega.objFunc_constrained(
                                    rhoomega, approx_grad=0, **kwargs)
                                fexact = f
                            assert isinstance(f, np.float64)
                            assert g.ndim == 1
                            assert g.size == 2 * K
                            assert np.isfinite(f)
                            assert np.all(np.isfinite(g))
                        print fexact
                        print fapprox
                        print ''
Beispiel #13
0
    def init_global_params(self, Data, K=0, **initArgs):
        ''' Initialize rho, omega, and theta to reasonable values.

        This is only called by "from scratch" init routines.
        '''
        self.K = K
        self.rho = OptimizerRhoOmega.create_initrho(K)
        self.omega = (1.0 + self.gamma) * np.ones(K)

        # To initialize theta, perform standard update given rho, omega
        # but with "empty" sufficient statistics.
        SS = SuffStatBag(K=self.K, D=Data.dim)
        SS.setField('StartStateCount', np.ones(K), dims=('K'))
        SS.setField('TransStateCount', np.ones((K, K)), dims=('K', 'K'))
        self.transTheta, self.startTheta = self._calcTheta(SS)
Beispiel #14
0
    def testGradientExactAndApproxAgree__objFunc_constrained(
            self, hmmKappa=100):
        ''' Verify computed gradient similar for exact and approx methods
        '''
        print ''
        for K in [1, 2, 10]:
            for gamma in [1.0, 2.0, 6.28]:
                for alpha in [0.1, 0.9, 1.5]:
                    for seed in [333, 777, 888]:

                        PRNG = np.random.RandomState(seed)
                        u = np.linspace(0.1, 0.9, K)
                        Vd = sampleVd(u, K + 1, alpha, PRNG=PRNG)
                        sumLogPi = summarizeVdToPi(Vd)

                        # Randomly initialize rho and omega
                        rho = PRNG.rand(K)
                        omega = K * PRNG.rand(K)
                        rhoomega = np.hstack([rho, omega])

                        kwargs = dict(alpha=alpha,
                                      gamma=1,
                                      nDoc=K + 1,
                                      kappa=hmmKappa,
                                      sumLogPi=sumLogPi)

                        # Exact gradient
                        f, g = OptimizerRhoOmega.objFunc_constrained(
                            rhoomega, approx_grad=0, **kwargs)

                        # Approx gradient
                        oFunc_cons = OptimizerRhoOmega.objFunc_constrained

                        def objFunc(x):
                            return oFunc_cons(x, approx_grad=1, **kwargs)

                        epsvec = np.hstack(
                            [1e-8 * np.ones(K), 1e-8 * np.ones(K)])
                        gapprox = approx_fprime(rhoomega, objFunc, epsvec)

                        print np2flatstr(g)
                        print np2flatstr(gapprox)
                        print ''
                        assert np.allclose(g, gapprox, atol=0, rtol=0.001)
Beispiel #15
0
    def testGradientZeroAtOptimum__objFunc_constrained(self):
        ''' Verify gradient at optimum is indistinguishable from zero
        '''
        for K in [1, 10, 107]:
            for alpha in [0.1, 0.95]:
                for gamma in [1., 3.14, 9.45]:
                    for kappa in [0, 100]:
                        rho = 1. / (1. + gamma) * np.ones(K)
                        omega = (1 + gamma) * np.ones(K)
                        rhoomega = np.hstack([rho, omega])

                        kwargs = dict(alpha=alpha,
                                      gamma=gamma,
                                      kappa=kappa,
                                      nDoc=0,
                                      sumLogPi=np.zeros(K + 1))
                        f, g = OptimizerRhoOmega.objFunc_constrained(
                            rhoomega, approx_grad=0, **kwargs)
                        print '       rho  ', np2flatstr(rho[:K])
                        print '  grad rho  ', np2flatstr(g[:K])
                        assert np.allclose(g, np.zeros(2 * K))
Beispiel #16
0
    def testRecoverRhoThatGeneratedData__find_optimum(self):
        ''' Verify find_optimum's result is indistiguishable from analytic opt
        '''
        print ''
        gamma = 1.0
        for K in [93, 107, 85]:  # , 10, 107]:
            for alpha in [0.9999]:
                for nDoc in [10000]:
                    print '============== K %d | alpha %.2f | nDoc %d' \
                          % (K, alpha, nDoc)

                    for seed in [111, 222, 333]:

                        PRNG = np.random.RandomState(seed)
                        u_true = np.linspace(0.01, 0.99, K)
                        Vd = sampleVd(u_true, nDoc, alpha, PRNG=PRNG)
                        sumLogPi = summarizeVdToPi(Vd)

                        initrho = PRNG.rand(K)
                        initomega = 100 * PRNG.rand(K)
                        scale = 1.0  # float(1+nDoc)/K
                        kwargs = dict(alpha=alpha,
                                      gamma=gamma,
                                      nDoc=nDoc,
                                      scaleVector=np.hstack([
                                          np.ones(K),
                                          float(scale) * np.ones(K)
                                      ]),
                                      sumLogPi=sumLogPi)
                        rho_est, omega_est, f_est, Info = \
                            OptimizerRhoOmega.find_optimum_multiple_tries(
                                initrho=initrho,
                                initomega=initomega,
                                **kwargs)
                        assert np.all(np.isfinite(rho_est))
                        assert np.all(np.isfinite(omega_est))
                        assert np.isfinite(f_est)
                        print Info['msg']

                        rho_orig = u_true
                        omega_orig = (1 + gamma) * np.ones(K)
                        ro_orig = np.hstack([rho_orig, omega_orig])
                        rho_hot, omega_hot, f_hot, Ihot = \
                            OptimizerRhoOmega.find_optimum_multiple_tries(
                                initrho=rho_orig,
                                initomega=omega_orig,
                                **kwargs)

                        f_orig, _ = OptimizerRhoOmega.objFunc_constrained(
                            ro_orig, **kwargs)
                        print '  f_orig %.7f' % (f_orig)
                        print '  f_hot  %.7f' % (f_hot)
                        print '  f_est  %.7f' % (f_est)

                        print '  rho_orig', np2flatstr(rho_orig, fmt='%9.6f')
                        print '  rho_hot ', np2flatstr(rho_hot, fmt='%9.6f')
                        print '  rho_est ', np2flatstr(rho_est, fmt='%9.6f')

                        assert f_hot <= f_orig
                        assert np.allclose(f_est, f_hot, rtol=0.01)
                        assert np.allclose(rho_est,
                                           rho_hot,
                                           atol=0.02,
                                           rtol=1e-5)
Beispiel #17
0
    def testGradientExactAndApproxAgree__sumLogPiRemVec(self):
        ''' Verify computed gradient similar for exact and approx methods
        '''
        print ''
        for K in [1, 2, 10, 54]:
            for alpha in [0.1, 0.95]:
                for gamma in [1., 9.45]:
                    for nDoc in [1, 100, 1000]:

                        print '============= K %d | nDoc %d | alpha %.2f' \
                              % (K, nDoc, alpha)

                        for seed in [111, 222, 333]:
                            PRNG = np.random.RandomState(seed)
                            u = np.linspace(0.01, 0.99, K)
                            Vd = sampleVd(u, nDoc, alpha, PRNG=PRNG)
                            sumLogPi = summarizeVdToPi(Vd)
                            sumLogPiRemVec = np.zeros(K)
                            sumLogPiActiveVec = np.zeros(K)
                            sumLogPiActiveVec[:] = sumLogPi[:-1]
                            sumLogPiRemVec[-1] = sumLogPi[-1]

                            rho = PRNG.rand(K)
                            omega = 100 * PRNG.rand(K)
                            rhoomega = np.hstack([rho, omega])
                            kwargs = dict(alpha=alpha,
                                          gamma=gamma,
                                          nDoc=nDoc,
                                          sumLogPiActiveVec=sumLogPiActiveVec,
                                          sumLogPiRemVec=sumLogPiRemVec)

                            # Exact gradient
                            f, g = OptimizerRhoOmega.objFunc_constrained(
                                rhoomega, approx_grad=0, **kwargs)

                            # Approx gradient
                            oFunc_cons = OptimizerRhoOmega.objFunc_constrained
                            objFunc = lambda x: oFunc_cons(
                                x, approx_grad=1, **kwargs)
                            epsvec = np.hstack(
                                [1e-8 * np.ones(K), 1e-8 * np.ones(K)])
                            gapprox = approx_fprime(rhoomega, objFunc, epsvec)

                            print '      rho 1:10 ', np2flatstr(rho)
                            print '     grad 1:10 ', np2flatstr(g[:K],
                                                                fmt='% .6e')
                            print ' autograd 1:10 ', np2flatstr(gapprox[:K],
                                                                fmt='% .6e')
                            if K > 10:
                                print '     rho K-10:K ', np2flatstr(rho[-10:])
                                print '    grad K-10:K ', np2flatstr(
                                    g[K - 10:K], fmt='% .6e')
                                print 'autograd K-10:K ', np2flatstr(
                                    gapprox[K - 10:K], fmt='% .6e')
                            rtol_rho = 0.01
                            atol_rho = 1e-6
                            rtol_omega = 0.05
                            atol_omega = 0.01
                            # Note: small omega derivas cause lots of problems
                            # so we should use high atol to avoid these issues
                            assert np.allclose(g[:K],
                                               gapprox[:K],
                                               atol=atol_rho,
                                               rtol=rtol_rho)
                            oGradOK = np.allclose(g[K:],
                                                  gapprox[K:],
                                                  atol=atol_omega,
                                                  rtol=rtol_omega)
                            if not oGradOK:
                                print 'VIOLATION DETECTED!'
                                print 'grad_approx DOES NOT EQUAL grad_exact'

                                absDiff = np.abs(g[K:] - gapprox[K:])
                                tolDiff = (atol_omega +
                                           rtol_omega * np.abs(gapprox[K:]) -
                                           absDiff)
                                worstIDs = np.argsort(tolDiff)
                                print 'Top 5 worst mismatches'
                                print np2flatstr(g[K + worstIDs[:5]],
                                                 fmt='% .6f')
                                print np2flatstr(gapprox[K + worstIDs[:5]],
                                                 fmt='% .6f')
                            assert oGradOK
Beispiel #18
0
 def f_objFunc(rho, omega, **kwargs):
     f, grad = OptimizerRhoOmega.objFunc_constrained(
         np.hstack([rho, omega]), **kwargs)
     return -1 * kwargs['nDoc'] * f
Beispiel #19
0
    def find_optimum_rhoOmega(self, **kwargs):
        ''' Performs numerical optimization of rho and omega for M-step update.

        Note that the optimizer forces rho to be in [EPS, 1-EPS] for
        the sake of numerical stability

        Returns
        -------
        rho : 1D array, size K
        omega : 1D array, size K
        Info : dict of information about optimization.
        '''

        # Calculate expected log transition probability
        # using theta vectors for all K states plus initial state
        ELogPi = digamma(self.transTheta) \
            - digamma(np.sum(self.transTheta, axis=1))[:, np.newaxis]
        sumELogPi = np.sum(ELogPi, axis=0)
        startELogPi = digamma(self.startTheta) \
            - digamma(np.sum(self.startTheta))

        # Select initial rho, omega values for gradient descent
        if hasattr(self, 'rho') and self.rho.size == self.K:
            initRho = self.rho
        else:
            initRho = None

        if hasattr(self, 'omega') and self.omega.size == self.K:
            initOmega = self.omega
        else:
            initOmega = None

        # Do the optimization
        try:
            rho, omega, fofu, Info = \
                OptimizerRhoOmega.find_optimum_multiple_tries(
                    sumLogPi=sumELogPi,
                    sumLogPiActiveVec=None,
                    sumLogPiRemVec=None,
                    startAlphaLogPi=self.startAlpha * startELogPi,
                    nDoc=self.K + 1,
                    gamma=self.gamma,
                    alpha=self.transAlpha,
                    kappa=self.kappa,
                    initrho=initRho,
                    initomega=initOmega)
            self.OptimizerInfo = Info
            self.OptimizerInfo['fval'] = fofu

        except ValueError as error:
            if hasattr(self, 'rho') and self.rho.size == self.K:
                Log.error(
                    '***** Optim failed. Remain at cur val. ' +
                    str(error))
                rho = self.rho
                omega = self.omega
            else:
                Log.error('***** Optim failed. Set to prior. ' + str(error))
                omega = (self.gamma + 1) * np.ones(SS.K)
                rho = 1 / float(1 + self.gamma) * np.ones(SS.K)

        return rho, omega