def __init__(self, nstates=None, nsymbols=None, transition=None, emission=None, pi=None): assert not ((nstates is None and transition is None and pi is None) or (nsymbols is None and emission is None)) if not transition is None: assert transition.ndim == 2 if not nstates is None: assert nstates == transition.shape[0] else: nstates = transition.shape[0] if not emission is None: assert transition.shape[0] == emission.shape[0] if not pi is None: assert transition.shape[0] == pi.size if not emission is None: assert emission.ndim == 2 if not nsymbols is None: assert nsymbols == emission.shape[1] else: nsymbols = emission.shape[1] if not nstates is None: assert nstates == emission.shape[0] else: nstates == emission.shape[0] if not pi is None: assert emission.shape[0] == pi.size if not pi is None: assert pi.ndim == 1 if not nstates is None: assert nstates == pi.size else: nstates = pi.size if transition is None: transition = almost_uniform_matrix(nstates) if emission is None: emission = almost_uniform_matrix(nstates, nsymbols) if pi is None: pi = almost_uniform_vector(nstates) self.set_transition(transition) self.set_emission(emission) self.set_pi(pi)
def entropic_reestimate(omega, theta=None, Z=1, maxiter=100, tol=1e-7, verbose=False): """ Re-estimates a statistic parameter vector entropically [1]_. Parameters ---------- omega : array_like Evidence vector theta : array_like, optional Parameter vector to be re-estimated under given evidence and learning rate (default None) Z : {-1, 0, +1}, optional -1: Algorithm reduces to traditional MLE (e.g the Baum-Welch) 0: ? +1: Algorithm will seek maximum structure maxiter : int, optional Maximum number of iterations of Fixed-point loop (default 100) verbose : bool, optional Display verbose output (default off) Returns ------- theta_hat : array_like Learned parameter vector Z : float Final Learning rate _lambda : float Limiting value of Lagrange multiplier Examples -------- >>> from entropy_map import entropic_reestimate >>> omega = [1, 2] >>> theta = [0.50023755, 0.49976245] >>> theta_hat, final_Z, _lambda = entropic_reestimate(omega, theta, Z=1, tol=1e-6) >>> theta_hat array([ 0.33116253, 0.66883747]) >>> final_Z 0.041828014112488016 >>> _lambda -3.0152672618320637 References ---------- .. [1] Matthiew Brand, "Pattern learning via entropy maximization" """ def _debug(msg=''): if verbose: print msg # XXX TODO: handle Z = 0 case assert Z != 0 # if no initial theta specified, start with uniform candidate if theta is None: theta = almost_uniform_vector(len(omega)) # all arrays must be numpy-like omega = array(omega, dtype='float64') theta = array(theta, dtype='float64') # XXX TODO: trim-off any evidence which 'relatively close to 0' (since such evidence can't justify anything!) informative_indices = nonzero(minimum(omega, theta) > _EPSILON) _omega = omega[informative_indices] _theta = theta[informative_indices] # prepare initial _lambda which will ensure that Lambert's W is real-valued if Z > 0: critical_lambda = min(-Z*(2 + log(_omega/Z))) _lambda = critical_lambda - 1 # or anything less than the critical value above elif Z < 0: # make an educated guess _lambda = -mean(Z*(log(_theta) + 1) + _omega/_theta) assert all(-_omega*exp(1+_lambda/Z)/Z > -1/e), -_omega*exp(1+_lambda/Z)/Z # Fixed-point loop _theta_hat = _theta iteration = 0 converged = False _debug("entropy_map: starting Fixed-point loop ..\n") _debug("Initial model: %s"%_theta) _debug("Initial lambda: %s"%_lambda) _debug("Initila learning rate (Z): %s"%Z) while not converged: # exhausted ? if maxiter <= iteration: break # if necessary, re-scale learning rate (Z) so that exp(1 + _lambda/Z) is not 'too small' if _lambda < 0: if Z > 0: new_Z = -_lambda/_BEAR elif Z < 0: new_Z = _lambda/_BEAR if new_Z != Z: Z = new_Z _debug("N.B:- We'll re-scale learning rate (Z) to %s to prevent Lambert's W function from vanishing."%(Z)) # prepare argument (vector) for Lambert's W function z = -_omega*exp(1 + _lambda/Z)/Z assert all(isreal(z)) if any(z < -1/e): _debug("Lambert's W: argument z = %s out of range (-1/e, +inf)"%z) break # compute Lambert's W function at z if Z <= 0: g = W(z, k=0) else: g = W(z, k=-1) assert all(isreal(g)) g = real(g) # check against division by zero (btw we re-scaled Z to prevent this) # assert all(g != 0) assert all(abs(g) > _EPSILON) # re-estimate _theta _theta_hat = (-_omega/Z)/g assert all(_theta_hat >= 0) # normalize the approximated _theta_hat parameter _theta_hat = normalize_probabilities(_theta_hat) # re-estimate _lambda _lambda_hat = -(Z*(log(_theta_hat[0]) + 1) + _omega[0]/_theta_hat[0]) # [0] or any other index [i] # check whether _lambda values have convergede converged, _, relative_error = check_converged(_lambda, _lambda_hat, tol=tol) # verbose for debugging, etc. _debug("Iteration: %d"%iteration) _debug('Current parameter estimate:\n%s'%_theta) _debug('lambda: %s'%_lambda) _debug("Relative error in lambda over last iteration: %s"%relative_error) _debug("Learning rate (Z): %s"%Z) # update _lambda and _theta _lambda = _lambda_hat _theta = _theta_hat # goto next iteration iteration += 1 _debug('\n') _debug("Done.") _debug('Final parameter estimate:\n%s'%_theta) _debug('lambda: %s'%_lambda) _debug("Relative error in lambda over last iteration: %s"%relative_error) _debug("Learning rate (Z): %s"%Z) # converged ? if converged: _debug("entropic_reestimate: loop converged after %d iterations (tolerance was set to %s)"%(iteration,tol)) else: _debug("entropic_reestimate: loop did not converge after %d iterations (tolerance was set to %s)"\ %(maxiter,tol)) # render results theta_hat = 0*theta theta_hat[informative_indices] = _theta_hat return theta_hat, Z, _lambda