def learn(self, obseqs, tol=1e-6, miniter=25, maxiter=100, method='baumwelch'): likelihood = -inf iteration = 0 while True: print "Iteration:", iteration print "Current model:", str(self) # budget exhausted ? if maxiter <= iteration: print "Model did not converge after %d iterations (tolerance was set to %s)."%(iteration,tol) break # learn new model if method == 'baumwelch': result = self.do_baumwelch(obseqs) elif method == 'brand': result = self.do_brand(obseqs) else: raise RuntimeError, "Unsupported parameter-estimation method: %s"%method print "Model likelihood:", result.get('likelihood') # converged ? if result.get('likelihood') == 0 and miniter <= iteration: print "Model converged (to global optimum) after %d iterations."%iteration break # update model likelihood converged, increased, relative_error = check_converged(likelihood, result.get('likelihood'), tol=tol) likelihood = result.get('likelihood') if method == 'baumwelch': assert increased # if this fails, then somethx is terribly wrong with the do_baumwelch code!!! print "Relative error in model likelihood over last iteration:", relative_error print # converged ? if converged and miniter <= iteration: print "Model converged after %d iterations (tolerance was set to %s)."%(iteration,tol) break # update model self.set_transition(result.get('transition')) self.set_emission(result.get('emission')) self.set_pi(result.get('pi')) # proceed with next iteration iteration += 1
def entropic_reestimate(omega, theta=None, Z=1, maxiter=100, tol=1e-7, verbose=False): """ Re-estimates a statistic parameter vector entropically [1]_. Parameters ---------- omega : array_like Evidence vector theta : array_like, optional Parameter vector to be re-estimated under given evidence and learning rate (default None) Z : {-1, 0, +1}, optional -1: Algorithm reduces to traditional MLE (e.g the Baum-Welch) 0: ? +1: Algorithm will seek maximum structure maxiter : int, optional Maximum number of iterations of Fixed-point loop (default 100) verbose : bool, optional Display verbose output (default off) Returns ------- theta_hat : array_like Learned parameter vector Z : float Final Learning rate _lambda : float Limiting value of Lagrange multiplier Examples -------- >>> from entropy_map import entropic_reestimate >>> omega = [1, 2] >>> theta = [0.50023755, 0.49976245] >>> theta_hat, final_Z, _lambda = entropic_reestimate(omega, theta, Z=1, tol=1e-6) >>> theta_hat array([ 0.33116253, 0.66883747]) >>> final_Z 0.041828014112488016 >>> _lambda -3.0152672618320637 References ---------- .. [1] Matthiew Brand, "Pattern learning via entropy maximization" """ def _debug(msg=''): if verbose: print msg # XXX TODO: handle Z = 0 case assert Z != 0 # if no initial theta specified, start with uniform candidate if theta is None: theta = almost_uniform_vector(len(omega)) # all arrays must be numpy-like omega = array(omega, dtype='float64') theta = array(theta, dtype='float64') # XXX TODO: trim-off any evidence which 'relatively close to 0' (since such evidence can't justify anything!) informative_indices = nonzero(minimum(omega, theta) > _EPSILON) _omega = omega[informative_indices] _theta = theta[informative_indices] # prepare initial _lambda which will ensure that Lambert's W is real-valued if Z > 0: critical_lambda = min(-Z*(2 + log(_omega/Z))) _lambda = critical_lambda - 1 # or anything less than the critical value above elif Z < 0: # make an educated guess _lambda = -mean(Z*(log(_theta) + 1) + _omega/_theta) assert all(-_omega*exp(1+_lambda/Z)/Z > -1/e), -_omega*exp(1+_lambda/Z)/Z # Fixed-point loop _theta_hat = _theta iteration = 0 converged = False _debug("entropy_map: starting Fixed-point loop ..\n") _debug("Initial model: %s"%_theta) _debug("Initial lambda: %s"%_lambda) _debug("Initila learning rate (Z): %s"%Z) while not converged: # exhausted ? if maxiter <= iteration: break # if necessary, re-scale learning rate (Z) so that exp(1 + _lambda/Z) is not 'too small' if _lambda < 0: if Z > 0: new_Z = -_lambda/_BEAR elif Z < 0: new_Z = _lambda/_BEAR if new_Z != Z: Z = new_Z _debug("N.B:- We'll re-scale learning rate (Z) to %s to prevent Lambert's W function from vanishing."%(Z)) # prepare argument (vector) for Lambert's W function z = -_omega*exp(1 + _lambda/Z)/Z assert all(isreal(z)) if any(z < -1/e): _debug("Lambert's W: argument z = %s out of range (-1/e, +inf)"%z) break # compute Lambert's W function at z if Z <= 0: g = W(z, k=0) else: g = W(z, k=-1) assert all(isreal(g)) g = real(g) # check against division by zero (btw we re-scaled Z to prevent this) # assert all(g != 0) assert all(abs(g) > _EPSILON) # re-estimate _theta _theta_hat = (-_omega/Z)/g assert all(_theta_hat >= 0) # normalize the approximated _theta_hat parameter _theta_hat = normalize_probabilities(_theta_hat) # re-estimate _lambda _lambda_hat = -(Z*(log(_theta_hat[0]) + 1) + _omega[0]/_theta_hat[0]) # [0] or any other index [i] # check whether _lambda values have convergede converged, _, relative_error = check_converged(_lambda, _lambda_hat, tol=tol) # verbose for debugging, etc. _debug("Iteration: %d"%iteration) _debug('Current parameter estimate:\n%s'%_theta) _debug('lambda: %s'%_lambda) _debug("Relative error in lambda over last iteration: %s"%relative_error) _debug("Learning rate (Z): %s"%Z) # update _lambda and _theta _lambda = _lambda_hat _theta = _theta_hat # goto next iteration iteration += 1 _debug('\n') _debug("Done.") _debug('Final parameter estimate:\n%s'%_theta) _debug('lambda: %s'%_lambda) _debug("Relative error in lambda over last iteration: %s"%relative_error) _debug("Learning rate (Z): %s"%Z) # converged ? if converged: _debug("entropic_reestimate: loop converged after %d iterations (tolerance was set to %s)"%(iteration,tol)) else: _debug("entropic_reestimate: loop did not converge after %d iterations (tolerance was set to %s)"\ %(maxiter,tol)) # render results theta_hat = 0*theta theta_hat[informative_indices] = _theta_hat return theta_hat, Z, _lambda