def initialize(self, datas, inputs=None, masks=None, tags=None, verbose=0, num_init_iters=50, discrete_state_init_method="random", num_init_restarts=1): # First initialize the observation model self.emissions.initialize(datas, inputs, masks, tags) # Get the initialized variational mean for the data xs = [ self.emissions.invert(data, input, mask, tag) for data, input, mask, tag in zip(datas, inputs, masks, tags) ] xmasks = [np.ones_like(x, dtype=bool) for x in xs] # Number of times to run the arhmm initialization (we'll use the one with the highest log probability as the initialization) pbar = ssm_pbar(num_init_restarts, verbose, "ARHMM Initialization restarts", ['']) #Loop through initialization restarts best_lp = -np.inf for i in pbar: #range(num_init_restarts): # Now run a few iterations of EM on a ARHMM with the variational mean if verbose > 0: print( "Initializing with an ARHMM using {} steps of EM.".format( num_init_iters)) arhmm = hmm.HMM(self.K, self.D, M=self.M, init_state_distn=copy.deepcopy( self.init_state_distn), transitions=copy.deepcopy(self.transitions), observations=copy.deepcopy(self.dynamics)) arhmm.fit(xs, inputs=inputs, masks=xmasks, tags=tags, verbose=verbose, method="em", num_iters=num_init_iters, init_method=discrete_state_init_method) #Keep track of the arhmm that led to the highest log probability current_lp = arhmm.log_probability(xs) if current_lp > best_lp: best_lp = copy.deepcopy(current_lp) best_arhmm = copy.deepcopy(arhmm) self.init_state_distn = copy.deepcopy(best_arhmm.init_state_distn) self.transitions = copy.deepcopy(best_arhmm.transitions) self.dynamics = copy.deepcopy(best_arhmm.observations)
def _fit_laplace_em(self, variational_posterior, datas, inputs=None, masks=None, tags=None, verbose=2, num_iters=100, num_samples=1, continuous_optimizer="newton", continuous_tolerance=1e-4, continuous_maxiter=100, emission_optimizer="lbfgs", emission_optimizer_maxiter=100, alpha=0.5, learning=True): """ Fit an approximate posterior p(z, x | y) \approx q(z) q(x). Perform block coordinate ascent on q(z) followed by q(x). Assume q(x) is a Gaussian with a block tridiagonal precision matrix, and that we update q(x) via Laplace approximation. Assume q(z) is a chain-structured discrete graphical model. """ elbos = [ self._laplace_em_elbo(variational_posterior, datas, inputs, masks, tags) ] pbar = ssm_pbar(num_iters, verbose, "ELBO: {:.1f}", [elbos[-1]]) for itr in pbar: # 1. Update the discrete state posterior q(z) if K>1 if self.K > 1: self._fit_laplace_em_discrete_state_update( variational_posterior, datas, inputs, masks, tags, num_samples) # 2. Update the continuous state posterior q(x) self._fit_laplace_em_continuous_state_update( variational_posterior, datas, inputs, masks, tags, continuous_optimizer, continuous_tolerance, continuous_maxiter) # Update parameters if learning: self._fit_laplace_em_params_update(variational_posterior, datas, inputs, masks, tags, emission_optimizer, emission_optimizer_maxiter, alpha) elbos.append( self._laplace_em_elbo(variational_posterior, datas, inputs, masks, tags)) if verbose == 2: pbar.set_description("ELBO: {:.1f}".format(elbos[-1])) return np.array(elbos)
def _fit_em(self, datas, inputs, masks, tags, verbose=2, num_iters=100, tolerance=0, init_state_mstep_kwargs={}, transitions_mstep_kwargs={}, observations_mstep_kwargs={}, **kwargs): """ Fit the parameters with expectation maximization. E step: compute E[z_t] and E[z_t, z_{t+1}] with message passing; M-step: analytical maximization of E_{p(z | x)} [log p(x, z; theta)]. """ lls = [self.log_probability(datas, inputs, masks, tags)] pbar = ssm_pbar(num_iters, verbose, "LP: {:.1f}", [lls[-1]]) for itr in pbar: # E step: compute expected latent states with current parameters expectations = [ self.expected_states(data, input, mask, tag) for data, input, mask, tag, in zip(datas, inputs, masks, tags) ] # M step: maximize expected log joint wrt parameters self.init_state_distn.m_step(expectations, datas, inputs, masks, tags, **init_state_mstep_kwargs) self.transitions.m_step(expectations, datas, inputs, masks, tags, **transitions_mstep_kwargs) self.observations.m_step(expectations, datas, inputs, masks, tags, **observations_mstep_kwargs) # Store progress lls.append(self.log_prior() + sum([ll for (_, _, ll) in expectations])) if verbose == 2: pbar.set_description("LP: {:.1f}".format(lls[-1])) # Check for convergence if itr > 0 and abs(lls[-1] - lls[-2]) < tolerance: if verbose == 2: pbar.set_description("Converged to LP: {:.1f}".format( lls[-1])) break return lls
def _fit_em(self, datas, inputs, masks, tags, verbose=2, num_iters=100, **kwargs): """ Fit the parameters with expectation maximization. E step: compute E[z_t] and E[z_t, z_{t+1}] with message passing; M-step: analytical maximization of E_{p(z | x)} [log p(x, z; theta)]. """ lls = [self.log_probability(datas, inputs, masks, tags)] pbar = ssm_pbar(num_iters, verbose, "LP: {:.1f}", [lls[-1]]) for itr in pbar: # E step: compute expected latent states with current parameters expectations = [ self.expected_states(data, input, mask, tag) for data, input, mask, tag in zip(datas, inputs, masks, tags) ] # E step: also sample the posterior for stochastic M step of transition model samples = [ self.posterior_sample(data, input, mask, tag) for data, input, mask, tag in zip(datas, inputs, masks, tags) ] # M step: maximize expected log joint wrt parameters self.init_state_distn.m_step(expectations, datas, inputs, masks, tags, **kwargs) self.transitions.m_step(expectations, datas, inputs, masks, tags, samples, **kwargs) self.observations.m_step(expectations, datas, inputs, masks, tags, **kwargs) # Store progress lls.append(self.log_prior() + sum([ll for (_, _, ll) in expectations])) if verbose == 2: pbar.set_description("LP: {:.1f}".format(lls[-1])) return lls
def _fit_sgd(self, optimizer, datas, inputs, masks, tags, verbose=2, num_iters=1000, **kwargs): """ Fit the model with maximum marginal likelihood. """ T = sum([data.shape[0] for data in datas]) def _objective(params, itr): self.params = params obj = self.log_probability(datas, inputs, masks, tags) return -obj / T # Set up the progress bar lls = [-_objective(self.params, 0) * T] pbar = ssm_pbar(num_iters, verbose, "Epoch {} Itr {} LP: {:.1f}", [0, 0, lls[-1]]) # Run the optimizer step = dict(sgd=sgd_step, rmsprop=rmsprop_step, adam=adam_step)[optimizer] state = None for itr in pbar: self.params, val, g, state = step(value_and_grad(_objective), self.params, itr, state, **kwargs) lls.append(-val * T) if verbose == 2: pbar.set_description("LP: {:.1f}".format(lls[-1])) pbar.update(1) return lls
def _fit_stochastic_em(self, optimizer, datas, inputs, masks, tags, verbose=2, num_epochs=100, **kwargs): """ Replace the M-step of EM with a stochastic gradient update using the ELBO computed on a minibatch of data. """ M = len(datas) T = sum([data.shape[0] for data in datas]) # A helper to grab a minibatch of data perm = [np.random.permutation(M) for _ in range(num_epochs)] def _get_minibatch(itr): epoch = itr // M m = itr % M i = perm[epoch][m] return datas[i], inputs[i], masks[i], tags[i][i] # Define the objective (negative ELBO) def _objective(params, itr): # Grab a minibatch of data data, input, mask, tag = _get_minibatch(itr) Ti = data.shape[0] # E step: compute expected latent states with current parameters Ez, Ezzp1, _ = self.expected_states(data, input, mask, tag) # M step: set the parameter and compute the (normalized) objective function self.params = params pi0 = self.init_state_distn.initial_state_distn log_Ps = self.transitions.log_transition_matrices( data, input, mask, tag) log_likes = self.observations.log_likelihoods( data, input, mask, tag) # Compute the expected log probability # (Scale by number of length of this minibatch.) obj = self.log_prior() obj += np.sum(Ez[0] * np.log(pi0)) * M obj += np.sum(Ezzp1 * log_Ps) * (T - M) / (Ti - 1) obj += np.sum(Ez * log_likes) * T / Ti assert np.isfinite(obj) return -obj / T # Set up the progress bar lls = [-_objective(self.params, 0) * T] pbar = ssm_pbar(num_epochs * M, verbose, "Epoch {} Itr {} LP: {:.1f}", [0, 0, lls[-1]]) # Run the optimizer step = dict(sgd=sgd_step, rmsprop=rmsprop_step, adam=adam_step)[optimizer] state = None for itr in pbar: self.params, val, _, state = step(value_and_grad(_objective), self.params, itr, state, **kwargs) epoch = itr // M m = itr % M lls.append(-val * T) if verbose == 2: pbar.set_description("Epoch {} Itr {} LP: {:.1f}".format( epoch, m, lls[-1])) pbar.update(1) return lls
def _fit_bbvi(self, variational_posterior, datas, inputs, masks, tags, verbose=2, learning=True, optimizer="adam", num_iters=100, **kwargs): """ Fit with black box variational inference using a Gaussian approximation for the latent states x_{1:T}. """ # Define the objective (negative ELBO) T = sum([data.shape[0] for data in datas]) def _objective(params, itr): if learning: self.params, variational_posterior.params = params else: variational_posterior.params = params obj = self._bbvi_elbo(variational_posterior, datas, inputs, masks, tags) return -obj / T # Initialize the parameters if learning: params = (self.params, variational_posterior.params) else: params = variational_posterior.params # Set up the progress bar elbos = [-_objective(params, 0) * T] pbar = ssm_pbar(num_iters, verbose, "LP: {:.1f}", [elbos[0]]) # Run the optimizer step = dict(sgd=sgd_step, rmsprop=rmsprop_step, adam=adam_step)[optimizer] state = None for itr in pbar: params, val, g, state = step(value_and_grad(_objective), params, itr, state) elbos.append(-val * T) # TODO: Check for convergence -- early stopping # Update progress bar if verbose == 2: pbar.set_description("ELBO: {:.1f}".format(elbos[-1])) pbar.update() # Save the final parameters if learning: self.params, variational_posterior.params = params else: variational_posterior.params = params return np.array(elbos)