def hmm_expected_states(log_pi0, log_Ps, ll): T, K = ll.shape # Make sure everything is C contiguous to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr log_pi0 = to_c(getval(log_pi0)) log_Ps = to_c(getval(log_Ps)) ll = to_c(getval(ll)) alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) normalizer = logsumexp(alphas[-1]) betas = np.zeros((T, K)) backward_pass(log_Ps, ll, betas) expected_states = alphas + betas expected_states -= logsumexp(expected_states, axis=1, keepdims=True) expected_states = np.exp(expected_states) expected_joints = alphas[:-1,:,None] + betas[1:,None,:] + ll[1:,None,:] + log_Ps expected_joints -= expected_joints.max((1,2))[:,None, None] expected_joints = np.exp(expected_joints) expected_joints /= expected_joints.sum((1,2))[:,None,None] return expected_states, expected_joints, normalizer
def hmm_expected_states(log_pi0, log_Ps, ll): T, K = ll.shape # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) normalizer = logsumexp(alphas[-1]) betas = np.zeros((T, K)) backward_pass(log_Ps, ll, betas) expected_states = alphas + betas expected_states -= logsumexp(expected_states, axis=1, keepdims=True) expected_states = np.exp(expected_states) expected_joints = alphas[:-1,:,None] + betas[1:,None,:] + ll[1:,None,:] + log_Ps expected_joints -= expected_joints.max((1,2))[:,None, None] expected_joints = np.exp(expected_joints) expected_joints /= expected_joints.sum((1,2))[:,None,None] return expected_states, expected_joints, normalizer
def _make_grad_hmm_normalizer(argnum, ans, log_pi0, log_Ps, ll): # Unbox the inputs if necessary log_pi0 = getval(log_pi0) log_Ps = getval(log_Ps) ll = getval(ll) # Make sure everything is C contiguous to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) dlog_pi0 = np.zeros_like(log_pi0) dlog_Ps= np.zeros_like(log_Ps) dll = np.zeros_like(ll) T, K = ll.shape # Forward pass to get alphas alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll) if argnum == 0: return lambda g: g * dlog_pi0 if argnum == 1: return lambda g: g * dlog_Ps if argnum == 2: return lambda g: g * dll
def _make_grad_hmm_normalizer(argnum, ans, pi0, Ps, ll): # Make sure everything is C contiguous and unboxed pi0 = to_c(pi0) Ps = to_c(Ps) ll = to_c(ll) dlog_pi0 = np.zeros_like(pi0) dlog_Ps = np.zeros_like(Ps) dll = np.zeros_like(ll) T, K = ll.shape # Forward pass to get alphas alphas = np.zeros((T, K)) forward_pass(pi0, Ps, ll, alphas) grad_hmm_normalizer(np.log(Ps), alphas, dlog_pi0, dlog_Ps, dll) # Compute necessary gradient # Account for the log transformation # df/dP = df/dlogP * dlogP/dP = df/dlogP * 1 / P if argnum == 0: return lambda g: g * dlog_pi0 / pi0 if argnum == 1: return lambda g: g * dlog_Ps / Ps if argnum == 2: return lambda g: g * dll
def hmm_expected_states(log_pi0, log_Ps, ll, memlimit=2**31): T, K = ll.shape # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) normalizer = logsumexp(alphas[-1]) betas = np.zeros((T, K)) backward_pass(log_Ps, ll, betas) # Compute E[z_t] for t = 1, ..., T expected_states = alphas + betas expected_states -= logsumexp(expected_states, axis=1, keepdims=True) expected_states = np.exp(expected_states) # Compute E[z_t, z_{t+1}] for t = 1, ..., T-1 # Note that this is an array of size T*K*K, which can be quite large. # To be a bit more frugal with memory, first check if the given log_Ps # are TxKxK. If so, instantiate the full expected joints as well, since # we will need them for the M-step. However, if log_Ps is 1xKxK then we # know that the transition matrix is stationary, and all we need for the # M-step is the sum of the expected joints. stationary = (log_Ps.shape[0] == 1) if not stationary: expected_joints = alphas[:-1, :, None] + betas[1:, None, :] + ll[ 1:, None, :] + log_Ps expected_joints -= expected_joints.max((1, 2))[:, None, None] expected_joints = np.exp(expected_joints) expected_joints /= expected_joints.sum((1, 2))[:, None, None] else: # Compute the sum over time axis of the expected joints # Limit ourselves to approximately 1GB of memory, assuming # the entries are float64's (8 bytes) batch_size = int(memlimit / (8 * K * K)) assert batch_size > 0 expected_joints = np.zeros((1, K, K)) for start in range(0, T - 1, batch_size): stop = min(T - 1, start + batch_size) # Compute expectations in this batch tmp = alphas[start:stop, :, None] + betas[start + 1:stop + 1, None, :] + ll[start + 1:stop + 1, None, :] + log_Ps tmp -= tmp.max((1, 2))[:, None, None] tmp = np.exp(tmp) tmp /= tmp.sum((1, 2))[:, None, None] expected_joints += tmp.sum(axis=0) return expected_states, expected_joints, normalizer
def hmm_normalizer(log_pi0, log_Ps, ll): T, K = ll.shape alphas = np.zeros((T, K)) # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) forward_pass(log_pi0, log_Ps, ll, alphas) return logsumexp(alphas[-1])
def test_grad_hmm_normalizer(T=1000, K=3): log_pi0, log_Ps, ll = make_parameters(T, K) dlog_pi0, dlog_Ps, dll = np.zeros_like(log_pi0), np.zeros_like(log_Ps), np.zeros_like(ll) alphas = np.zeros((T, K)) forward_pass(-np.log(K) * np.ones(K), log_Ps, ll, alphas) grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll) assert np.allclose(dlog_pi0, grad(hmm_normalizer_np, argnum=0)(log_pi0, log_Ps, ll)) assert np.allclose(dlog_Ps, grad(hmm_normalizer_np, argnum=1)(log_pi0, log_Ps, ll)) assert np.allclose(dll, grad(hmm_normalizer_np, argnum=2)(log_pi0, log_Ps, ll))
def test_grad_hmm_normalizer(T=10, K=3): pi0, Ps, ll = make_parameters(T, K) dlogpi0, dlogPs, dll = np.zeros_like(pi0), np.zeros_like(Ps), np.zeros_like(ll) alphas = np.zeros((T, K)) forward_pass(pi0, Ps, ll, alphas) grad_hmm_normalizer(np.log(Ps), alphas, dlogpi0, dlogPs, dll) assert np.allclose(dlogpi0 / pi0, grad(hmm_normalizer_np, argnum=0)(pi0, Ps, ll)) assert np.allclose(dlogPs / Ps, grad(hmm_normalizer_np, argnum=1)(pi0, Ps, ll)) assert np.allclose(dll, grad(hmm_normalizer_np, argnum=2)(pi0, Ps, ll))
def hmm_normalizer(log_pi0, log_Ps, ll): T, K = ll.shape alphas = np.zeros((T, K)) # Make sure everything is C contiguous to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) forward_pass(log_pi0, log_Ps, ll, alphas) return logsumexp(alphas[-1])
def hmm_sample(log_pi0, log_Ps, ll): T, K = ll.shape # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) # Forward pass gets the predicted state at time t given # observations up to and including those from time t alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) # Sample backward us = npr.rand(T) zs = -1 * np.ones(T, dtype=int) backward_sample(log_Ps, ll, alphas, us, zs) return zs
def hmm_expected_states(log_pi0, log_Ps, ll, memlimit=2**31): T, K = ll.shape # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) normalizer = logsumexp(alphas[-1]) betas = np.zeros((T, K)) backward_pass(log_Ps, ll, betas) # Compute E[z_t] for t = 1, ..., T expected_states = alphas + betas expected_states -= logsumexp(expected_states, axis=1, keepdims=True) expected_states = np.exp(expected_states) # Compute E[z_t, z_{t+1}] for t = 1, ..., T-1 # Note that this is an array of size T*K*K, which can be quite large. # To be a bit more frugal with memory, first check if the given log_Ps # are TxKxK. If so, instantiate the full expected joints as well, since # we will need them for the M-step. However, if log_Ps is 1xKxK then we # know that the transition matrix is stationary, and all we need for the # M-step is the sum of the expected joints. stationary = (log_Ps.shape[0] == 1) if not stationary: expected_joints = alphas[:-1, :, None] + betas[1:, None, :] + ll[ 1:, None, :] + log_Ps expected_joints -= expected_joints.max((1, 2))[:, None, None] expected_joints = np.exp(expected_joints) expected_joints /= expected_joints.sum((1, 2))[:, None, None] else: # Compute the sum over time axis of the expected joints expected_joints = np.zeros((K, K)) compute_stationary_expected_joints(alphas, betas, ll, log_Ps[0], expected_joints) expected_joints = expected_joints[None, :, :] return expected_states, expected_joints, normalizer
def hmm_filter(log_pi0, log_Ps, ll): T, K = ll.shape # Make sure everything is C contiguous log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) # Forward pass gets the predicted state at time t given # observations up to and including those from time t alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) # Predict forward with the transition matrix pz_tt = np.exp(alphas - logsumexp(alphas, axis=1, keepdims=True)) pz_tp1t = np.matmul(pz_tt[:-1,None,:], np.exp(log_Ps))[:,0,:] # Include the initial state distribution pz_tp1t = np.row_stack((np.exp(log_pi0 - logsumexp(log_pi0)), pz_tp1t)) assert np.allclose(np.sum(pz_tp1t, axis=1), 1.0) return pz_tp1t
def _make_grad_hmm_normalizer(argnum, ans, log_pi0, log_Ps, ll): # Make sure everything is C contiguous and unboxed log_pi0 = to_c(log_pi0) log_Ps = to_c(log_Ps) ll = to_c(ll) dlog_pi0 = np.zeros_like(log_pi0) dlog_Ps= np.zeros_like(log_Ps) dll = np.zeros_like(ll) T, K = ll.shape # Forward pass to get alphas alphas = np.zeros((T, K)) forward_pass(log_pi0, log_Ps, ll, alphas) grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll) if argnum == 0: return lambda g: g * dlog_pi0 if argnum == 1: return lambda g: g * dlog_Ps if argnum == 2: return lambda g: g * dll
def test_hmm_mp_perf(T=10000, K=100, D=20): # Make parameters pi0 = np.ones(K) / K Ps = npr.rand(T-1, K, K) Ps /= Ps.sum(axis=2, keepdims=True) ll = npr.randn(T, K) out1 = np.zeros((T, K)) out2 = np.zeros((T, K)) # Run the PyHSMM message passing code from pyhsmm.internals.hmm_messages_interface import messages_forwards_log, messages_backwards_log tic = time() messages_forwards_log(Ps, ll, pi0, out1) pyhsmm_dt = time() - tic print("PyHSMM Fwd: ", pyhsmm_dt, "sec") # Run the SSM message passing code from ssm.messages import forward_pass, backward_pass forward_pass(pi0, Ps, ll, out2) # Call once to compile, then time it tic = time() forward_pass(pi0, Ps, ll, out2) smm_dt = time() - tic print("SMM Fwd: ", smm_dt, "sec") assert np.allclose(out1, out2) # Backward pass tic = time() messages_backwards_log(Ps, ll, out1) pyhsmm_dt = time() - tic print("PyHSMM Bwd: ", pyhsmm_dt, "sec") backward_pass(Ps, ll, out2) # Call once to compile, then time it tic = time() backward_pass(Ps, ll, out2) smm_dt = time() - tic print("SMM (Numba) Bwd: ", smm_dt, "sec") assert np.allclose(out1, out2)
def test_forward_pass(T=1000, K=3): log_pi0, log_Ps, ll = make_parameters(T, K) a1 = forward_pass_np(log_pi0, log_Ps, ll) a2 = np.zeros((T, K)) forward_pass(-np.log(K) * np.ones(K), log_Ps, ll, a2) assert np.allclose(a1, a2)
def test_forward_pass(T=1000, K=3): pi0, Ps, ll = make_parameters(T, K) a1 = forward_pass_np(pi0, Ps, ll) a2 = np.zeros((T, K)) forward_pass(pi0, Ps, ll, a2) assert np.allclose(a1, a2)