def tgl_forward_backward( emp_cov, alpha=0.01, beta=1., max_iter=100, n_samples=None, verbose=False, tol=1e-4, delta=1e-4, gamma=1., lamda=1., eps=0.5, debug=False, return_history=False, return_n_iter=True, choose='gamma', lamda_criterion='b', time_norm=1, compute_objective=True, return_n_linesearch=False, vareps=1e-5, stop_at=None, stop_when=1e-4, laplacian_penalty=False, init='empirical'): """Time-varying graphical lasso solver with forward-backward splitting. Solves the following problem via FBS: min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1}) where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data matrix X (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_times, n_features, n_features) Empirical covariance of data. alpha, beta : float, optional Regularisation parameters. max_iter : int, optional Maximum number of iterations. n_samples : ndarray Number of samples available for each time point. verbose : bool, default False Print info at each iteration. tol : float, optional Absolute tolerance for convergence. delta, gamma, lamda, eps : float, optional FBS parameters. debug : bool, default False Run in debug mode. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. choose : ('gamma', 'lambda', 'fixed', 'both) Search iteratively gamma / lambda / none / both. lamda_criterion : ('a', 'b', 'c') Criterion to choose lamda. See ref for details. time_norm : float, optional Choose the temporal norm between points. compute_objective : bool, default True Choose to compute the objective value. return_n_linesearch : bool, optional Return the number of line-search iterations before convergence. vareps : float, optional Jitter for the loss. stop_at, stop_when : float, optional Other convergence criteria, as used in the paper. laplacian_penalty : bool, default False Use Laplacian penalty. init : {'empirical', 'zero', ndarray} Choose how to initialize the precision matrix, with the inverse empirical covariance, zero matrix or precomputed. Returns ------- K, covariance : numpy.array, 3-dimensional (T x d x d) Solution to the problem for each time t=1...T . history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ available_choose = ('gamma', 'lamda', 'fixed', 'both') if choose not in available_choose: raise ValueError( "`choose` parameter must be one of %s." % available_choose) n_times, _, n_features = emp_cov.shape K = init_precision(emp_cov, mode=init) if laplacian_penalty: obj_partial = partial( objective_laplacian, n_samples=n_samples, emp_cov=emp_cov, alpha=alpha, beta=beta, vareps=vareps) function_f = partial( loss_laplacian, beta=beta, n_samples=n_samples, S=emp_cov, vareps=vareps) gradient_f = partial( grad_loss_laplacian, emp_cov=emp_cov, beta=beta, n_samples=n_samples, vareps=vareps) function_g = partial(penalty_laplacian, alpha=alpha) else: psi = partial(vector_p_norm, p=time_norm) obj_partial = partial( objective, n_samples=n_samples, emp_cov=emp_cov, alpha=alpha, beta=beta, psi=psi, vareps=vareps) function_f = partial( loss, n_samples=n_samples, S=emp_cov, vareps=vareps) gradient_f = partial( grad_loss, emp_cov=emp_cov, n_samples=n_samples, vareps=vareps) function_g = partial(penalty, alpha=alpha, beta=beta, psi=psi) max_residual = -np.inf n_linesearch = 0 checks = [convergence(obj=obj_partial(precision=K))] for iteration_ in range(max_iter): k_previous = K.copy() x_inv = np.array([linalg.pinvh(x) for x in K]) grad = gradient_f(K, x_inv=x_inv) if choose in ['gamma', 'both']: gamma, y = choose_gamma( gamma / eps if iteration_ > 0 else gamma, K, function_f=function_f, beta=beta, alpha=alpha, lamda=lamda, grad=grad, delta=delta, eps=eps, max_iter=200, p=time_norm, x_inv=x_inv, choose=choose, laplacian_penalty=laplacian_penalty) x_hat = K - gamma * grad if choose not in ['gamma', 'both']: if laplacian_penalty: y = soft_thresholding_od(x_hat, alpha * gamma) else: y = prox_FL( x_hat, beta * gamma, alpha * gamma, p=time_norm, symmetric=True) if choose in ('lamda', 'both'): lamda, n_ls = choose_lamda( min(lamda / eps if iteration_ > 0 else lamda, 1), K, function_f=function_f, objective_f=obj_partial, gradient_f=gradient_f, function_g=function_g, gamma=gamma, delta=delta, eps=eps, criterion=lamda_criterion, max_iter=200, p=time_norm, grad=grad, prox=y, vareps=vareps) n_linesearch += n_ls K = K + min(max(lamda, 0), 1) * (y - K) # K, t = fista_step(Y, Y - Y_old, t) check = convergence( obj=obj_partial(precision=K), rnorm=np.linalg.norm(upper_diag_3d(K) - upper_diag_3d(k_previous)), snorm=np.linalg.norm( obj_partial(precision=K) - obj_partial(precision=k_previous)), e_pri=np.sqrt(upper_diag_3d(K).size) * tol + tol * max( np.linalg.norm(upper_diag_3d(K)), np.linalg.norm(upper_diag_3d(k_previous))), e_dual=tol) if verbose and iteration_ % (50 if verbose < 2 else 1) == 0: print( "obj: %.4f, rnorm: %.7f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) if return_history: checks.append(check) if np.isnan(check.rnorm) or np.isnan(check.snorm): warnings.warn("precision is not positive definite.") if stop_at is not None: if abs(check.obj - stop_at) / abs(stop_at) < stop_when: break else: # use this convergence criterion subgrad = (x_hat - K) / gamma if 0: if laplacian_penalty: grad = grad_loss_laplacian( K, emp_cov, n_samples, vareps=vareps) else: grad = grad_loss(K, emp_cov, n_samples, vareps=vareps) res_norm = np.linalg.norm(grad + subgrad) if iteration_ == 0: normalizer = res_norm + 1e-6 max_residual = max( np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6 else: res_norm = np.linalg.norm(K - k_previous) / gamma max_residual = max(max_residual, res_norm) normalizer = max( np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6 r_rel = res_norm / max_residual r_norm = res_norm / normalizer if not debug and (r_rel <= tol or r_norm <= tol) and iteration_ > 0: # or ( # check.rnorm <= check.e_pri and iteration_ > 0): break else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(k) for k in K]) return_list = [K, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_ + 1) if return_n_linesearch: return_list.append(n_linesearch) return return_list
def kernel_time_graphical_lasso( emp_cov, alpha=0.01, rho=1, kernel=None, max_iter=100, n_samples=None, verbose=False, psi="laplacian", tol=1e-4, rtol=1e-4, return_history=False, return_n_iter=True, mode="admm", update_rho_options=None, compute_objective=True, stop_at=None, stop_when=1e-4, init="empirical", ): """Time-varying graphical lasso solver. Solves the following problem via ADMM: min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1} + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t) where S is the empirical covariance of the data matrix D (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. alpha, beta : float, optional Regularisation parameter. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. init : {'empirical', 'zeros', ndarray}, default 'empirical' How to initialise the inverse covariance matrix. Default is take the empirical covariance and inverting it. Returns ------- X : numpy.array, 2-dimensional Solution to the problem. history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) n_times, _, n_features = emp_cov.shape if kernel is None: kernel = np.eye(n_times) Z_0 = init_precision(emp_cov, mode=init) U_0 = np.zeros_like(Z_0) Z_0_old = np.zeros_like(Z_0) Z_M, Z_M_old = {}, {} U_M = {} for m in range(1, n_times): # all possible markovians jumps Z_L = Z_0.copy()[:-m] Z_R = Z_0.copy()[m:] Z_M[m] = (Z_L, Z_R) U_L = np.zeros_like(Z_L) U_R = np.zeros_like(Z_R) U_M[m] = (U_L, U_R) Z_L_old = np.zeros_like(Z_L) Z_R_old = np.zeros_like(Z_R) Z_M_old[m] = (Z_L_old, Z_R_old) if n_samples is None: n_samples = np.ones(n_times) checks = [ convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha, kernel, psi)) ] for iteration_ in range(max_iter): # update K A = Z_0 - U_0 for m in range(1, n_times): A[:-m] += Z_M[m][0] - U_M[m][0] A[m:] += Z_M[m][1] - U_M[m][1] A /= n_times # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho) # K = np.array(map(soft_thresholding_, A)) A += A.transpose(0, 2, 1) A /= 2.0 A *= -rho * n_times / n_samples[:, None, None] A += emp_cov K = np.array([ prox_logdet(a, lamda=ni / (rho * n_times)) for a, ni in zip(A, n_samples) ]) # update Z_0 A = K + U_0 A += A.transpose(0, 2, 1) A /= 2.0 Z_0 = soft_thresholding(A, lamda=alpha / rho) # update residuals U_0 += K - Z_0 # other Zs for m in range(1, n_times): U_L, U_R = U_M[m] A_L = K[:-m] + U_L A_R = K[m:] + U_R if not psi_node_penalty: prox_e = prox_psi(A_R - A_L, lamda=2.0 * np.diag(kernel, m)[:, None, None] / rho) Z_L = 0.5 * (A_L + A_R - prox_e) Z_R = 0.5 * (A_L + A_R + prox_e) else: Z_L, Z_R = prox_psi( np.concatenate((A_L, A_R), axis=1), lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter, ) Z_M[m] = (Z_L, Z_R) # update other residuals U_L += K[:-m] - Z_L U_R += K[m:] - Z_R # diagnostics, reporting, termination checks rnorm = np.sqrt( squared_norm(K - Z_0) + sum( squared_norm(K[:-m] - Z_M[m][0]) + squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times))) snorm = rho * np.sqrt( squared_norm(Z_0 - Z_0_old) + sum( squared_norm(Z_M[m][0] - Z_M_old[m][0]) + squared_norm(Z_M[m][1] - Z_M_old[m][1]) for m in range(1, n_times))) obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel, psi) if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=n_features * n_times * tol + rtol * max( np.sqrt( squared_norm(Z_0) + sum( squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1]) for m in range(1, n_times))), np.sqrt( squared_norm(K) + sum( squared_norm(K[:-m]) + squared_norm(K[m:]) for m in range(1, n_times))), ), e_dual=n_features * n_times * tol + rtol * rho * np.sqrt( squared_norm(U_0) + sum( squared_norm(U_M[m][0]) + squared_norm(U_M[m][1]) for m in range(1, n_times))), ) Z_0_old = Z_0.copy() for m in range(1, n_times): Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy()) if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if stop_at is not None: if abs(check.obj - stop_at) / abs(stop_at) < stop_when: break if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled U_0 *= rho / rho_new for m in range(1, n_times): U_L, U_R = U_M[m] U_L *= rho / rho_new U_R *= rho / rho_new rho = rho_new else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(x) for x in Z_0]) return_list = [Z_0, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_ + 1) return return_list
def _fit(self, emp_cov, n_samples): if self.kernel is None: # from scipy.optimize import minimize # discover best kernel parameter via EM # initialise precision matrices, as warm start self.precision_ = init_precision(emp_cov, mode=self.init) n_times = self.precision_.shape[0] theta_old = np.zeros(n_times * (n_times - 1) // 2) # idx = np.triu_indices(n_times, 1) kernel = np.eye(n_times) psi, _, _ = check_norm_prox(self.psi) if self.n_clusters is None: self.n_clusters = n_times for i in range(self.max_iter_ext): # E step - discover best kernel # , method='bounded'bounds=[(0, None)]*theta_old.size # theta = minimize( # objective_similarity, theta_old, # args=(self.precision_, self.classes_[:, None], psi) # ).x # theta -= np.min(theta) # theta /= np.max(theta) theta = precision_similarity(self.precision_, psi) # if i > 0 and np.linalg.norm(theta_old - # theta) / theta.size < self.eps: # break # kernel[idx] = theta # kernel[idx[::-1]] = theta kernel = theta labels_pred = AgglomerativeClustering( n_clusters=self.n_clusters, affinity="precomputed", linkage="complete").fit_predict(kernel) if i > 0 and np.linalg.norm(labels_pred - labels_pred_old ) / labels_pred.size < self.eps: break kernel = kernels.RBF(0.0001)( labels_pred[:, None]) + kernels.RBF(self.beta)( np.arange(n_times)[:, None]) # normalize_matrix(kernel_sum) # kernel += kerne * self.beta # M step - fix the kernel matrix out = kernel_time_graphical_lasso( emp_cov, alpha=self.alpha, rho=self.rho, kernel=kernel, n_samples=n_samples, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, update_rho_options=self.update_rho_options, compute_objective=self.compute_objective, init=self.precision_, ) if self.return_history: (self.precision_, self.covariance_, self.history_, self.n_iter_) = out else: self.precision_, self.covariance_, self.n_iter_ = out theta_old = theta labels_pred_old = labels_pred # kernel = graph_k_means( # list(self.precision_), 3, max_iter=100) # self.similarity_matrix = kernel # theta_old = kernel # if i > 0 and np.linalg.norm(theta_old - # kernel) / kernel.size < self.eps: # break else: warnings.warn("theta did not converge.") self.similarity_matrix_ = kernel else: kernel = self.kernel if kernel.shape[0] != self.classes_.size: raise ValueError( "Kernel size does not match classes of samples, " "got {} classes and kernel has shape {}".format( self.classes_.size, kernel.shape[0])) out = kernel_time_graphical_lasso( emp_cov, alpha=self.alpha, rho=self.rho, kernel=kernel, n_samples=n_samples, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, update_rho_options=self.update_rho_options, compute_objective=self.compute_objective, init=self.init, ) if self.return_history: (self.precision_, self.covariance_, self.history_, self.n_iter_) = out else: self.precision_, self.covariance_, self.n_iter_ = out return self
def _fit(self, emp_cov, n_samples): if self.ker_param == "auto": from scipy.optimize import minimize_scalar if not callable(self.kernel): raise ValueError( "kernel should be a function if ker_param=='auto'") # discover best kernel parameter via EM # initialise precision matrices, as warm start self.precision_ = init_precision(emp_cov, mode=self.init) theta_old = 0 for i in range(self.max_iter_ext): # E step - discover best kernel parameter theta = minimize_scalar( objective_kernel, args=(self.precision_, self.psi, self.kernel, self.classes_[:, None]), bounds=(0, emp_cov.shape[0]), method="bounded", ).x if i > 0 and abs(theta_old - theta) < 1e-5: break else: print("Find new theta: %f" % theta) # M step try: # this works if it is a ExpSineSquared or RBF kernel kernel = self.kernel(length_scale=theta)( self.classes_[:, None]) except TypeError: # maybe it's a ConstantKernel kernel = self.kernel(constant_value=theta)( self.classes_[:, None]) out = kernel_time_graphical_lasso( emp_cov, alpha=self.alpha, rho=self.rho, kernel=kernel, n_samples=n_samples, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, update_rho_options=self.update_rho_options, compute_objective=self.compute_objective, init=self.precision_, ) if self.return_history: (self.precision_, self.covariance_, self.history_, self.n_iter_) = out else: self.precision_, self.covariance_, self.n_iter_ = out theta_old = theta else: print("warning: theta not converged") else: if callable(self.kernel): try: # this works if it is a ExpSineSquared or RBF kernel kernel = self.kernel(length_scale=self.ker_param)( self.classes_[:, None]) except TypeError: # maybe it's a ConstantKernel kernel = self.kernel(constant_value=self.ker_param)( self.classes_[:, None]) else: kernel = self.kernel if kernel.shape[0] != self.classes_.size: raise ValueError( "Kernel size does not match classes of samples, " "got {} classes and kernel has shape {}".format( self.classes_.size, kernel.shape[0])) out = kernel_time_graphical_lasso( emp_cov, alpha=self.alpha, rho=self.rho, kernel=kernel, n_samples=n_samples, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, update_rho_options=self.update_rho_options, compute_objective=self.compute_objective, init=self.init, ) if self.return_history: (self.precision_, self.covariance_, self.history_, self.n_iter_) = out else: self.precision_, self.covariance_, self.n_iter_ = out return self
def latent_time_graphical_lasso(emp_cov, alpha=0.01, tau=1., rho=1., beta=1., eta=1., max_iter=100, n_samples=None, verbose=False, psi='laplacian', phi='laplacian', mode='admm', tol=1e-4, rtol=1e-4, return_history=False, return_n_iter=True, update_rho_options=None, compute_objective=True, init='empirical'): r"""Latent variable time-varying graphical lasso solver. Solves the following problem via ADMM: min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1} + tau ||L_i||_* + beta sum_{i=2}^T Psi(K_i - K_{i-1}) + eta sum_{i=2}^T Phi(L_i - L_{i-1}) where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data matrix X (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. alpha, tau, beta, eta : float, optional Regularisation parameters. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. n_samples : ndarray Number of samples available for each time point. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. verbose : bool, default False Print info at each iteration. update_rho_options : dict, optional Arguments for the rho update. See regain.update_rules.update_rho function for more information. compute_objective : bool, default True Choose to compute the objective value. init : {'empirical', 'zeros', ndarray}, default 'empirical' How to initialise the inverse covariance matrix. Default is take the empirical covariance and inverting it. Returns ------- K, L : numpy.array, 3-dimensional (T x d x d) Solution to the problem for each time t=1...T . history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) phi, prox_phi, phi_node_penalty = check_norm_prox(phi) Z_0 = init_precision(emp_cov, mode=init) Z_1 = Z_0.copy()[:-1] Z_2 = Z_0.copy()[1:] W_0 = np.zeros_like(Z_0) W_1 = np.zeros_like(Z_1) W_2 = np.zeros_like(Z_2) X_0 = np.zeros_like(Z_0) X_1 = np.zeros_like(Z_1) X_2 = np.zeros_like(Z_2) U_1 = np.zeros_like(W_1) U_2 = np.zeros_like(W_2) R_old = np.zeros_like(Z_0) Z_1_old = np.zeros_like(Z_1) Z_2_old = np.zeros_like(Z_2) W_1_old = np.zeros_like(W_1) W_2_old = np.zeros_like(W_2) # divisor for consensus variables, accounting for two less matrices divisor = np.full(emp_cov.shape[0], 3, dtype=float) divisor[0] -= 1 divisor[-1] -= 1 if n_samples is None: n_samples = np.ones(emp_cov.shape[0]) checks = [] for iteration_ in range(max_iter): # update R A = Z_0 - W_0 - X_0 A += A.transpose(0, 2, 1) A /= 2. A *= -rho / n_samples[:, None, None] A += emp_cov # A = emp_cov / rho - A R = np.array( [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)]) # update Z_0 A = R + W_0 + X_0 A[:-1] += Z_1 - X_1 A[1:] += Z_2 - X_2 A /= divisor[:, None, None] # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho) # Z_0 = np.array(map(soft_thresholding_, A)) Z_0 = soft_thresholding(A, lamda=alpha / (rho * divisor[:, None, None])) # update Z_1, Z_2 A_1 = Z_0[:-1] + X_1 A_2 = Z_0[1:] + X_2 if not psi_node_penalty: prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho) Z_1 = .5 * (A_1 + A_2 - prox_e) Z_2 = .5 * (A_1 + A_2 + prox_e) else: Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1), lamda=.5 * beta / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update W_0 A = Z_0 - R - X_0 A[:-1] += W_1 - U_1 A[1:] += W_2 - U_2 A /= divisor[:, None, None] A += A.transpose(0, 2, 1) A /= 2. W_0 = np.array([ prox_trace_indicator(a, lamda=tau / (rho * div)) for a, div in zip(A, divisor) ]) # update W_1, W_2 A_1 = W_0[:-1] + U_1 A_2 = W_0[1:] + U_2 if not phi_node_penalty: prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho) W_1 = .5 * (A_1 + A_2 - prox_e) W_2 = .5 * (A_1 + A_2 + prox_e) else: W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1), lamda=.5 * eta / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update residuals X_0 += R - Z_0 + W_0 X_1 += Z_0[:-1] - Z_1 X_2 += Z_0[1:] - Z_2 U_1 += W_0[:-1] - W_1 U_2 += W_0[1:] - W_2 # diagnostics, reporting, termination checks rnorm = np.sqrt( squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) + squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) + squared_norm(W_0[1:] - W_2)) snorm = rho * np.sqrt( squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) + squared_norm(W_2 - W_2_old)) obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2, alpha, tau, beta, eta, psi, phi) \ if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max( np.sqrt( squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) + squared_norm(W_1) + squared_norm(W_2)), np.sqrt( squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) + squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) + squared_norm(W_0[1:]))), e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho * (np.sqrt( squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) + squared_norm(U_1) + squared_norm(U_2)))) R_old = R.copy() Z_1_old = Z_1.copy() Z_2_old = Z_2.copy() W_1_old = W_1.copy() W_2_old = W_2.copy() if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled X_0 *= rho / rho_new X_1 *= rho / rho_new X_2 *= rho / rho_new U_1 *= rho / rho_new U_2 *= rho / rho_new rho = rho_new else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(x) for x in Z_0]) return_list = [Z_0, W_0, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_) return return_list
def fit(self, X, y): # Covariance does not make sense for a single feature X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self) self.classes_, n_samples = np.unique(y, return_counts=True) self.data = X.copy() if np.unique(self.data).size != 2: raise ValueError( "Using the ising distribution your data has " "to contain only two values, either 0 and 1 " "or -1, 1" ) X = np.array([X[y == cl] for cl in self.classes_]) if self.ker_param == "auto": from scipy.optimize import minimize_scalar if not callable(self.kernel): raise ValueError("kernel should be a function if ker_param=='auto'") # discover best kernel parameter via alternating minimization # initialise precision matrices, as warm start self.precision_ = init_precision(X, mode=self.init) theta_old = 0 for i in range(self.max_iter_ext): # E step - discover best kernel parameter theta = minimize_scalar( objective_kernel, args=(self.precision_, self.psi, self.kernel, self.classes_[:, None]), bounds=(0, X.shape[0]), method="bounded", ).x if i > 0 and abs(theta_old - theta) < 1e-5: break else: print("Find new theta: %f" % theta) # M step try: # this works if it is a ExpSineSquared or RBF kernel kernel = self.kernel(length_scale=theta)(self.classes_[:, None]) except TypeError: # maybe it's a ConstantKernel kernel = self.kernel(constant_value=theta)(self.classes_[:, None]) out = _fit_time_ising_model( X, alpha=self.alpha, rho=self.rho, kernel=kernel, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, compute_objective=self.compute_objective, n_cores=self.n_cores, ) if self.return_history: self.precision_, self.history_, self.n_iter_ = out else: self.precision_, self.n_iter_ = out theta_old = theta else: print("warning: theta not converged") else: if callable(self.kernel): try: # this works if it is a ExpSineSquared or RBF kernel kernel = self.kernel(length_scale=self.ker_param)(self.classes_[:, None]) except TypeError: # maybe it's a ConstantKernel kernel = self.kernel(constant_value=self.ker_param)(self.classes_[:, None]) else: kernel = self.kernel if kernel.shape[0] != self.classes_.size: raise ValueError( "Kernel size does not match classes of samples, " "got {} classes and kernel has shape {}".format(self.classes_.size, kernel.shape[0]) ) out = _fit_time_ising_model( X, alpha=self.alpha, rho=self.rho, kernel=kernel, tol=self.tol, rtol=self.rtol, psi=self.psi, max_iter=self.max_iter, verbose=self.verbose, return_n_iter=True, return_history=self.return_history, compute_objective=self.compute_objective, ) if self.return_history: self.precision_, self.history_, self.n_iter_ = out else: self.precision_, self.n_iter_ = out return self