def expectation_maximization(X, Y, num_comps, max_iter, tol): # Initialize algorithm num_obs, obs_len, signal_dim = Y.shape _, _, model_ord_by_signal_dim = X.shape model_ord = int(model_ord_by_signal_dim / signal_dim) tau = np.zeros([num_obs, obs_len, num_comps]) mixing_coef = np.random.rand(num_obs, num_comps) mixing_coef = mixing_coef / np.sum(mixing_coef, axis=1, keepdims=True) ar_coef = initialize_autoregressive_components(num_comps, model_ord, signal_dim) residual = [] end_cond = 'maximum iteration' # Expectation-Maximization (EM) Algorithm for iteration in range(max_iter): # Expectation for j in range(num_comps): tau[:, :, j] = np.expand_dims(mixing_coef[:, j], axis=1) * np.exp( -0.5 * sl.norm(Y - np.matmul(X, ar_coef[j]), axis=2)) tau = tau / np.sum(tau, axis=2, keepdims=True) # Maximization mixing_coef_prev = np.copy(mixing_coef) ar_coef_prev = np.copy(ar_coef) mixing_coef = np.mean(tau, axis=1) for j in range(num_comps): tauX = np.expand_dims(tau[:, :, j], axis=2) * X ar_coef[j] = sl.solve(np.tensordot(tauX, X, axes=((0, 1), (0, 1))), np.tensordot(tauX, Y, axes=((0, 1), (0, 1))), assume_a='pos') residual.append( np.array([ sl.norm(ar_coef - ar_coef_prev), sl.norm(mixing_coef - mixing_coef_prev) ])) # Check convergence if iteration > 0 and np.all(residual[iteration] < tol * residual[0]): end_cond = 'relative tolerance' break # Compute weighted negative log likelihood (Font, et al., 2007) for j in range(num_comps): tau[:, :, j] = np.expand_dims(mixing_coef[:, j], axis=1) * np.exp( -0.5 * sl.norm(Y - np.matmul(X, ar_coef[j]), axis=2)) w_nll = -np.mean(np.log(np.sum(tau, axis=2))) return ar_coef, mixing_coef, w_nll, residual, end_cond
def alm_sample(num_obs, obs_len, signal_dim, num_comps, model_ord, coef_supp, coef_cond=None, comp_cond=None): """ Generates random samples according to the ALM :param num_obs: positive integer :param obs_len: positive integer :param signal_dim: positive integer :param num_comps: positive integer :param model_ord: positive integer :param coef_supp: positive integer less than num_comps :param coef_cond: positive float :param comp_cond: positive float :return obs: number_observations x obs_len x signal_dim numpy array :return mixing_coef: number_observations x num_comps numpy array :return ar_comps: num_comps x model_ord x signal_dim x signal_dim numpy array """ nr.seed() for step in range(MAX_ITER): ar_comps = initialize_autoregressive_components(num_comps, model_ord, signal_dim, stacked=False) mixing_coef = np.zeros([num_obs, num_comps]) for i in range(num_obs): supp = list(nr.choice(num_comps, size=coef_supp, replace=False)) mixing_coef[i, supp] = num_comps**(-1 / 2) * nr.randn(coef_supp) while not isstable( np.tensordot(mixing_coef[i, :], ar_comps, axes=1)): mixing_coef[i, supp] = num_comps**(-1 / 2) * nr.randn(coef_supp) obs = np.zeros([num_obs, obs_len, signal_dim]) for i in range(num_obs): obs[i, :, :] = autoregressive_sample( obs_len, signal_dim, signal_dim**(-1 / 2), np.tensordot(mixing_coef[i, :], ar_comps, axes=1)) if coef_cond is not None and comp_cond is not None: k1, k2 = check_alm_condition(obs, ar_comps, mixing_coef) if k1 < coef_cond and np.all(k2 < comp_cond): break else: break return obs, mixing_coef, ar_comps
def fit(self, obs, model_ord, num_comps, penalty_param, num_starts=5, initial_comps=None, return_path=False, return_all=False): """ Fit the ALMM model to obs :param obs: list of obs_len x signal_dim numpy array :param model_ord: positive integer :param num_comps: positive integer :param penalty_param: positive float :param num_starts: positive integer :param initial_comps: num_comps x model_ord*signal_dim x signal_dim numpy array :param return_path: boolean :param return_all: boolean :return ar_comps: [list of] num_comps x model_ord*signal_dim x signal_dim numpy array :return mixing_coef: [list of] num_observations x num_comps numpy array :return nll: [list of] float :return solver_time: list of float """ if not np.issubdtype(type(num_comps), np.int) or num_comps < 1: raise TypeError('Number of components must be a positive integer.') if not np.issubdtype(type(model_ord), np.int) or model_ord < 1: raise TypeError('Model order must be a positive integer.') if not isinstance(penalty_param, float) and penalty_param < 0: raise ValueError('Penalty parameter must be a positive float.') if not np.issubdtype(type(num_starts), np.int) or num_starts < 1: raise ValueError('Number of starts must be a positive integer.') _, signal_dimension = obs[0].shape if initial_comps is None: initial_comps = [ initialize_autoregressive_components(num_comps, model_ord, signal_dimension) for _ in range(num_starts) ] elif np.shape(initial_comps) != (num_starts, num_comps, model_ord * signal_dimension, signal_dimension): raise ValueError( 'Initial dictionary estimate must be list of num_comps x model_ord*signal_dim' + ' x signal_dim numpy arrays.') else: initial_comps = [ np.array([ component_kj / sl.norm(component_kj[:]) for component_kj in component_k ]) for component_k in initial_comps ] if not isinstance(return_path, bool): raise TypeError('Return path must be a boolean.') if not isinstance(return_all, bool): raise TypeError('Return all must be a boolean.') self.component, self.mixing_coef, self.solver_time, self.nll, self.residual, self.stop_condition \ = [], [], [], [], [], [] if self.verbose: print('-Formatting data...', end=" ", flush=True) YtY, XtY, XtX = package_observations(obs, model_ord) if self.verbose: print('Complete.') if self.verbose: print('-Fitting model to data...') for start_k in range(num_starts): if self.verbose and num_starts > 1: print('--Start: ' + str(start_k)) component_k, mixing_coef_k, component_residual_k, coef_residual_k, stop_condition_k, solver_time_k \ = self._fit(XtX, XtY, model_ord, num_comps, penalty_param, initial_comps[start_k], return_path=return_path) self.component.append(component_k) self.mixing_coef.append(mixing_coef_k) self.residual.append((component_residual_k, coef_residual_k)) self.stop_condition.append(stop_condition_k) self.solver_time.append(solver_time_k) if self.verbose: print('-Complete.') if self.verbose: print('-Computing likelihood...', end=" ", flush=True) for component_k, mixing_coef_k in zip(self.component, self.mixing_coef): if return_path: nll_k = [ negative_log_likelihood(YtY, XtX, XtY, component_ki, mixing_coef_ki, penalty_param, self.coef_penalty_type) for component_ki, mixing_coef_ki in zip( component_k, mixing_coef_k) ] else: nll_k = negative_log_likelihood(YtY, XtX, XtY, component_k, mixing_coef_k, penalty_param, self.coef_penalty_type) self.nll.append(nll_k) if self.verbose: print('Complete.') if num_starts == 1: return self.component.pop(), self.mixing_coef.pop(), self.nll.pop( ), self.solver_time.pop() elif return_all: return self.component, self.mixing_coef, self.nll, self.solver_time else: opt = 0 if return_path: nll_min = self.nll[0][-1] for i, nll_k in enumerate(self.nll): if nll_k[-1] < nll_min: opt = i nll_min = nll_k[-1] else: nll_min = self.nll[0] for i, nll_k in enumerate(self.nll): if nll_k < nll_min: opt = i nll_min = nll_k return self.component[opt], self.mixing_coef[opt], self.nll[ opt], self.solver_time[opt]
MODEL_ORD = 2 SPARSITY = 3 NUM_STARTS = 5 PENALTY_PARAM = 1e-2 colors = ['#ffffbf', '#fdae61', '#d7191c', '#abdda4', '#2b83ba'] x, C, D = alm_sample(NUM_OBS, OBS_LEN, SIG_DIM, NUM_COMPS, MODEL_ORD, SPARSITY, coef_cond=1e1, comp_cond=1e1) D_0 = [ initialize_autoregressive_components(NUM_COMPS, MODEL_ORD, SIG_DIM) for _ in range(NUM_STARTS) ] alm = Alm(solver='palm', verbose=True) D_palm, C_palm, palm_likelihood, _ = alm.fit(x, MODEL_ORD, NUM_COMPS, PENALTY_PARAM, num_starts=NUM_STARTS, initial_comps=D_0, return_path=True, return_all=True) palm_error = [] for i, Di in enumerate(D_palm): loss = [] for Dis in Di:
from experiments.utility import load_results, save_results NUM_OBS = [2**i for i in range(4, 11)] OBS_LEN = [2**i for i in range(4, 11)] SIGNAL_DIM = 5 NUM_COMPONENTS = 10 COEF_SUPPORT = 1 MODEL_ORDER = 2 NUM_STARTS = 10 PENALTY_PARAM = 1e-2 NUM_ITERATIONS = 10 error = np.zeros([NUM_ITERATIONS, len(NUM_OBS), len(OBS_LEN)]) #nll = np.zeros_like(error) for iteration in range(NUM_ITERATIONS): D = initialize_autoregressive_components(max(NUM_OBS), MODEL_ORDER, SIGNAL_DIM) x = np.zeros([max(NUM_OBS), max(OBS_LEN), SIGNAL_DIM]) for obs in range(max(NUM_OBS)): x[obs] = autoregressive_sample(max(OBS_LEN), SIGNAL_DIM, SIGNAL_DIM ** (-1 / 2), unstack_ar_coef(D[obs])) for i, n_i in enumerate(NUM_OBS): for j, m_i in enumerate(OBS_LEN): _, XtY, XtX = package_observations(x[:n_i, :m_i, :], MODEL_ORDER) D_ls = np.array([sl.solve(XtX_i, XtY_i, assume_a='pos') for XtX_i, XtY_i in zip(XtX, XtY)]) # nll[iteration, :, i, j] = np.array(L_palm) error[iteration, i, j] = np.mean(sl.norm(D[:n_i] - D_ls, ord='fro', axis=(1, 2))) ################### # save results ################### #save_results(error, 'n_vs_m-var.pickle')
PENALTY_PARAM = 1e-2 NUM_ITERATIONS = 10 nll = np.zeros([NUM_ITERATIONS, NUM_STARTS, len(NUM_OBS), len(OBS_LEN)]) error = np.zeros_like(nll) for iteration in range(NUM_ITERATIONS): x, _, D = alm_sample(max(NUM_OBS), max(OBS_LEN), SIGNAL_DIM, NUM_COMPONENTS, MODEL_ORDER, COEF_SUPPORT, coef_cond=1e2, comp_cond=1e2) D_0 = [ initialize_autoregressive_components(NUM_COMPONENTS, MODEL_ORDER, SIGNAL_DIM) for _ in range(NUM_STARTS) ] for i, n_i in enumerate(NUM_OBS): for j, m_i in enumerate(OBS_LEN): alm_model = Alm(solver='palm') D_palm, _, L_palm, _ = alm_model.fit(x[:(n_i - 1), :(m_i - 1), :], MODEL_ORDER, NUM_COMPONENTS, PENALTY_PARAM, num_starts=NUM_STARTS, initial_comps=D_0, return_all=True) nll[iteration, :, i, j] = np.array(L_palm) error_palm = [] for D_k in D_palm: