def calculate_Q(X: csc_matrix, update_in_place: bool = True) -> np.ndarray: """ :param X: a CSC matrix of shape (rows=features, cols=observations). :param update_in_place: whether or not to return a new Numpy array or modify the input X. :return: the word-word correlation matrix Q as a dense Numpy ndarray. """ n_features, n_observations = X.shape diagonal = np.zeros(n_features) if not update_in_place: X = X.copy() for col_idx in range(X.indptr.size - 1): col_start = X.indptr[col_idx] col_end = X.indptr[col_idx + 1] col_entries = X.data[col_start:col_end] col_sum = np.sum(col_entries) row_indices = X.indices[col_start:col_end] # TODO: figure out whether this loop and update by division can be written in more idiomatic Numpy diagonal[row_indices] += col_entries / (col_sum * (col_sum - 1)) X.data[col_start:col_end] = col_entries / sqrt(col_sum * (col_sum - 1)) Q = X * X.T / n_observations Q = np.array(Q.todense(), copy=False) diagonal = diagonal / n_observations Q = Q - np.diag(diagonal) return Q
def adjacency2laplacian(adj: csc_matrix, degree: csc_matrix = None, mode: int = 0) -> csc_matrix: """ This function create a graph laplacian matrix from the adjacency matrix. Parameters ---------- A (sparse matrix): Adjacency matrix. D (Degree matrix): Optional, diagonal matrix containing the sum over the adjacency row. mode (int): 0 Returns the standard graph Laplacian, L = D - A. 1 Returns the random walk normalized graph Laplacian, L = I - D^-1 * A. 2 Returns the symmetric normalized graph Laplacian, L = I - D^-0.5 * A D^-0.5. Returns ------- L (sparse matrix): graph Laplacian """ degree = adjacency2degree(adj) if degree is None else degree if mode == 0: # standard graph Laplacian return degree - adj elif mode == 1: # random walk graph Laplacian return eye(degree.shape[0], format='csc') - degree.power(-1) * adj elif mode == 2: # symmetric normalized graph Laplacian return eye( degree.shape[0], format='csc') - degree.power(-0.5) * adj * degree.power(-0.5) else: raise NotImplementedError
def csc_to_rmat(csc: sparse.csc_matrix): csc.sort_indices() t, conv_data, _ = get_type_conv(csc.dtype) return methods.new( f"{t}gCMatrix", i=as_integer(csc.indices), p=as_integer(csc.indptr), x=conv_data(csc.data), Dim=as_integer(list(csc.shape)), )
def fit(self, X: csc_matrix, y): print("Fit starts") # X, X_, y, y_ = train_test_split(X, y, self.test_fraction) e = self.predict(X) - y q = X.dot(self.V) n_samples, n_features = X.shape X = X.tocsc() for i in range(self.n_iter): # self.evaluate(i, X_, y_) # Global bias w0_ = -(e - self.w0).sum() / (n_samples + self.lambda_w0) e += w0_ - self.w0 self.w0 = w0_ # self.evaluate(i, X_, y_) # 1-way interaction for l in range(n_features): Xl = X.getcol(l).toarray() print(("\r Iteration #{} 1-way interaction " "progress {:.2%}; train error {}").format( i, l / n_features, err(e)), end="") w_ = -((e - self.w[l] * Xl) * Xl).sum() / ( np.power(Xl, 2).sum() + self.lambda_w) e += (w_ - self.w[l]) * Xl self.w[l] = w_ # self.evaluate(i, X_, y_) # 2-way interaction for f in range(self.latent_dimension): Qf = q[:, f].reshape(-1, 1) for l in range(n_features): Xl = X.getcol(l) idx = Xl.nonzero()[0] Xl = Xl.data.reshape(-1, 1) Vlf = self.V[l, f] print(( "\r Iteration #{} 2-way interaction progress {:.2%};" + "error {:.5}; validation_error NO").format( i, (f * n_features + l) / (self.latent_dimension * n_features), err(e)), end="") h = Xl * Qf[idx] - np.power(Xl, 2) * Vlf v_ = -((e[idx] - Vlf * h) * h).sum() / ( np.power(h, 2).sum() + self.lambda_v) e[idx] += (v_ - Vlf) * h Qf[idx] += (v_ - Vlf) * Xl self.V[l, f] = v_ q[:, f] = Qf.reshape(-1)
def SLIM(A: sp.csc_matrix, elanet: ElasticNet): ''' SLIM: Sparse Linear Methods for Top-N Recommender Systems - Xia Ning ; George Karypis https://ieeexplore.ieee.org/document/6137254 Run Sparse Linear Models over the rating matrix A. It uses the ElasticNet object internally. (code from https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/master/SLIM_ElasticNet/SLIMElasticNetRecommender.py) :param A: Rating matrix nxm where m in the number of items. Must be a csc_matrix :param elanet: ElasticNet object :return: W weight matrix. ''' warnings.simplefilter("ignore") n_users, n_items = A.shape # Predicting each column at time W_rows_idxs = [] W_cols_idxs = [] W_data = [] for j in tqdm(range(n_items), leave=False): # Target column aj = A[:, j].toarray() # Removing the j-th item from all users # Need to zero the data entries related to the j-th column st_idx = A.indptr[j] en_idx = A.indptr[j + 1] copy = A.data[st_idx:en_idx].copy() A.data[st_idx:en_idx] = 0.0 # Predicting the column elanet.fit(A, aj) # Fetching the coefficients (sparse) widx = elanet.sparse_coef_.indices wdata = elanet.sparse_coef_.data # Save information about position in the final matrix W_rows_idxs += list(widx) W_cols_idxs += [j] * len(widx) W_data += list(wdata) # reconstrucing the matrix A.data[st_idx:en_idx] = copy W = sp.csr_matrix((W_data, (W_rows_idxs, W_cols_idxs)), shape=(n_items, n_items)) return W
def significance( TTM: sp.csc_matrix, metric: Union[Callable, KeynessMetric], normalize: bool = False, n_contexts=None, n_words=None, ) -> sp.csc_matrix: """Computes statistical significance tf co-occurrences using `metric`. Args: TTM (sp.csc_matrix): [description] normalize (bool, optional): [description]. Defaults to False. Returns: sp.csc_matrix: [description] """ metric = metric if callable(metric) else METRIC_FUNCTION.get( metric, _undefined) K: float = n_contexts N: float = n_words """Total number of observations (counts)""" Z: float = float(TTM.sum()) """Number of observations per context (document, row sum)""" Zr = np.array(TTM.sum(axis=1), dtype=np.float64).flatten() """Row and column indices of non-zero elements.""" ii, jj = TTM.nonzero() Cij: np.ndarray = np.array(TTM[ii, jj], dtype=np.float64).flatten() """Compute weights (with optional normalize).""" weights: np.ndarray = metric(Cij=Cij, Z=Z, Zr=Zr, ii=ii, jj=jj, K=K, N=N, normalize=normalize) np.nan_to_num( weights, copy=False, posinf=0.0, neginf=0.0, nan=0.0, ) nz_indices: np.ndarray = weights.nonzero() return (weights[nz_indices], (ii[nz_indices], jj[nz_indices]))
def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=None, **lsmr_options) -> ndarray: r""" Removes projection of x on y from y Parameters ---------- x : csc_matrix Sparse array of regressors y : ndarray Array with shape (nobs, nvar) use_cache : bool Flag indicating whether results should be stored in the cache, and retrieved if available. x_hash : object Hashable object representing the values in x lsmr_options: dict Dictionary of options to pass to scipy.sparse.linalg.lsmr Returns ------- resids : ndarray Returns the residuals from regressing y on x, (nobs, nvar) Notes ----- Residuals are estiamted column-by-column as .. math:: \hat{\epsilon}_{j} = y_{j} - x^\prime \hat{\beta} where :math:`\hat{\beta}` is computed using lsmr. """ use_cache = use_cache and x_hash is not None regressor_hash = x_hash if x_hash is not None else '' default_opts = dict(atol=1e-8, btol=1e-8, show=False) default_opts.update(lsmr_options) resids = [] for i in range(y.shape[1]): _y = y[:, i:i + 1] variable_digest = '' if use_cache: hasher = hash_func() hasher.update(ascontiguousarray(_y.data)) variable_digest = hasher.hexdigest() if use_cache and variable_digest in _VARIABLE_CACHE[regressor_hash]: resid = _VARIABLE_CACHE[regressor_hash][variable_digest] else: beta = lsmr(x, _y, **default_opts)[0] resid = y[:, i:i + 1] - (x.dot(csc_matrix(beta[:, None]))).A _VARIABLE_CACHE[regressor_hash][variable_digest] = resid resids.append(resid) if resids: return column_stack(resids) else: return empty_like(y)
def PopularItems(A: sp.csc_matrix, limit=50): """ Returns the most popular items. :param A: user-item matrix :param limit: how many popular items should be returned. The other entries will be filled with 0s. """ n = A.shape[0] # used for indexing dummy_column = np.arange(n).reshape(n, 1) # Counting the number of interactions item_count = np.asarray(A.sum(axis=0)).reshape(-1) # Partially sorted indexes part_sort_indexes = bn.argpartition(-item_count, kth=limit) # Focusing on the tops unsorted_idx_tops = part_sort_indexes[:limit] unsorted_tops = item_count[unsorted_idx_tops] sorted_idx_tops_part = np.argsort(unsorted_tops) # Extracting the indexes of the tops respect of the original array sorted_idx_tops = part_sort_indexes[sorted_idx_tops_part] recommend = sp.lil_matrix(A.shape) # We assign real values between 0.5 and 1 to the tops so we can employ ranking metrics. recommend[dummy_column, sorted_idx_tops] = np.linspace(start=0.5, stop=1.0, num=limit) return recommend
def _calculate_neighbor_weight_matrix(self, train_r: csc_matrix) -> np.ndarray: l2norm = norm(train_r, ord=2, axis=1) l2norm[l2norm == 0] = 1 # handel 0 vector U: csc_matrix = train_r.multiply(1 / l2norm.reshape(-1, 1)) UUT = U.dot(U.transpose()).toarray() # dense W = np.exp(self._tau * np.power(1 - UUT, self._k)) np.fill_diagonal(W, 0) return W
def matrix_similarity(urm: sp.csc_matrix, shrink: int): item_weights = np.sqrt( np.sum(urm.power(2), axis=0) ).A numerator = urm.T.dot(urm) denominator = item_weights.T.dot(item_weights) + shrink + 1e-6 weights = numerator / denominator np.fill_diagonal(item_similarity, 0.0) return weights
def train(self, train_r: csc_matrix): def cal_loss(): rhat = W_theta.dot(R) + b rhat[rhat < 0] = 0 return np.sum(np.power(rhat - R, 2)) / (2 * n) R = train_r.toarray() m, n = train_r.shape theta = np.zeros((m, m)) b = np.zeros((m, 1)) W = self._calculate_neighbor_weight_matrix(train_r) W_theta = W * theta alpha_W = self._alpha * W self._training_history = [cal_loss()] print(f"Loss before training: {self._training_history[-1]:0.8E}") for epoch in range(self._max_epoch): pbar = tqdm(range(math.ceil(n / self._batch_size))) for batch_index in pbar: start = batch_index * self._batch_size B = train_r[:, start:start + self._batch_size].toarray() bsize = B.shape[1] B_hat = W_theta.dot(B) + b B_hat_B = (B_hat - B) B_hat_B[B_hat < 0] = 0 # multiply ReLu'(r_hat) delta_theta = alpha_W / bsize * (B_hat_B.dot( B.transpose())) + self._lambda * theta delta_b = self._alpha / bsize * (np.sum( B_hat_B, axis=1, keepdims=True)) # update theta theta = theta - delta_theta b = b - delta_b # update W_theta W_theta = W * theta delta_eps = np.sum( np.power(delta_theta, 2) + np.power(delta_b, 2)) pbar.set_description( f"Epoch {epoch + 1} batch {batch_index + 1} delta={delta_eps:.8E}" ) pbar.close() loss = cal_loss() self._training_history.append(loss) print(f"Final Loss of epoch {epoch + 1}: {loss:0.8E}") self._W_theta = W_theta self._b = b
def get_tfidf_matrix(cnts: sp.csc_matrix): """Convert the word count matrix into tfidf one. tfidf = log(tf + 1) * log((N - Nt + 0.5) / (Nt + 0.5)) * tf = term frequency in document * N = number of documents * Nt = number of occurences of term in all documents """ Ns = get_doc_freqs(cnts) idfs = np.log((cnts.shape[1] - Ns + 0.5) / (Ns + 0.5)) idfs[idfs < 0] = 0 idfs = sp.diags(idfs, 0) tfs = cnts.log1p() tfidfs = idfs.dot(tfs) return tfidfs
def __adjustTransitionMatrix(self, M: sparse.csc_matrix) \ -> sparse.csc_matrix: """Function to compute the adjusted Markov transition matrix, given the unadjusted matrix. This method enforces column stochastic behavior. Returns: sparse.csc_matrix -- Adjusted Markov transition matrix. """ logging.info('Building adjusted transition matrix') # counter last_check = 0 logging.info('Computing sum of columns of M') magnitues = M.sum(axis=0) logging.info('Iterating through each column, rebalancing') # Iterate through each column for i in range(self.N): # Isolating magnitude magnitude = magnitues[0, i] # If criteria are satisfied, redistribute probabilities if (magnitude < 1.0) and (magnitude != 0): count = M[:, i].nnz # Isolate nonzero indezes nonzero_idx = M[:, i].nonzero()[0] # Update indexes with balanced probabilities for idx in nonzero_idx: M[idx, i] = 1 / count # Log progress last_check = logLoopProgress(i, last_check, self.N, 'Stable transition matrix') logging.info('Built adjusted Markov transition matrix with {0} \ elements'.format(M.nnz)) return M
def SLIM_parallel(A: sp.csc_matrix, alpha: float, l1_ratio: float, max_iter: int): ''' SLIM: Sparse Linear Methods for Top-N Recommender Systems - Xia Ning ; George Karypis https://ieeexplore.ieee.org/document/6137254 Run Sparse Linear Models over the rating matrix A. (code from https://github.com/ruhan/toyslim/blob/master/slim_parallel.py) :param A: Rating matrix nxm where m in the number of items. Must be a csc_matrix :param alpha: a + b (a and b are the multipliers of the L1 and L2 penalties, respectively) :param l1_ratio: a / (a + b) (a and b are the multipliers of the L1 and L2 penalties, respectively) :param max_iter: number of iterations to run max for each item :return: W weight matrix. ''' warnings.simplefilter("ignore") n_items = A.shape[1] ranges = generate_slices(n_items) separated_tasks = [] for from_j, to_j in ranges: separated_tasks.append( [from_j, to_j, A.copy(), alpha, l1_ratio, max_iter]) with multiprocessing.Pool() as pool: results = pool.map(work, separated_tasks) W_rows_idxs = list(itertools.chain(*[x[0] for x in results])) W_cols_idxs = list(itertools.chain(*[x[1] for x in results])) W_data = list(itertools.chain(*[x[2] for x in results])) W = sp.csr_matrix((W_data, (W_rows_idxs, W_cols_idxs)), shape=(n_items, n_items)) return W
def optimize(sparse_km: csc_matrix, gamma: float, regcoef: float, L1: float, eps: float, max_iter: int) -> (csc_matrix, dict): """ Perform SVM on sparsified kernel matrix. :param sparse_km: sparsified kernel matrix. :param eps: epsilon value. :param max_iter: maximal number of iterations. :return: object weights. """ if sparse_km.shape[0] != sparse_km.shape[1]: raise Exception("Kernel matrix is not a squared matrix") log = {"grad_norm": [], "time": []} N = sparse_km.shape[0] def grad_f(x): t = x.copy() t.data -= 1 / (2 * N * regcoef) return -csr_matrix((N, 1)) + sparse_km.dot(x) - \ gamma*sparse_clip(-x, 0, None) + gamma*sparse_clip(t, 0, None) x0 = csr_matrix((N, 1)) x0[0, 0] = 1/2 grad_f0 = grad_f(x0) grad_min = BasicGradientUpdater(grad_f0.T) grad_max = BasicGradientUpdater(-grad_f0.T) iter_counter = 0 start = timeit.default_timer() current_point = x0 true_grad = grad_f0 while grad_min.get_norm() > eps**2 or iter_counter < max_iter: #if true_grad == grad_min.get(): log["grad_norm"].append(grad_min.get_norm()) log["time"].append(timeit.default_timer() - start) i_plus = grad_max.get_coordinate() g_plus = -grad_max.get_value() i_minus = grad_min.get_coordinate() g_minus = grad_min.get_value() h_val = 1/(4*L1)*(g_plus - g_minus) h = csr_matrix((N, 1)) h[i_plus, 0] = h_val h[i_minus, 0] = -h_val t = current_point.copy() # had to make this turnaround 'cause "sparse vector + constant" operation hasn't been implemented yet t.data -= 1 / (2 * N * regcoef) delta_grad = sparse_km.dot(h) delta_grad -= gamma*sparse_clip(-current_point - h, 0, None) delta_grad += gamma*sparse_clip(-current_point, 0, None) delta_grad += gamma*sparse_clip(t + h, 0, None) delta_grad -= gamma*sparse_clip(t, 0, None) grad_min.update(delta_grad.T) grad_max.update(-delta_grad.T) current_point += h true_grad = grad_f(current_point.T) iter_counter += 1 return h, log
def adjacency2degree(adj: csc_matrix) -> csc_matrix: """ Compute the degree matrix for a give adjacency matrix A""" return diags(np.asarray(adj.sum(1)).reshape(-1), format='csc')
def solve_problem(self, solver_right_mat: sps.csc_matrix) -> list: dense_right_mat = solver_right_mat.todense() dst_def_vts = self.inv_left_mat.solve(dense_right_mat).tolist() return dst_def_vts
def __init__(self, x: np.ndarray, sg: np.ndarray, hess: np.ndarray, scaling: csc_matrix, g_dscaling: csc_matrix, delta: float, theta: float, ub: np.ndarray, lb: np.ndarray, logger: Logger): """ :param x: Reference point :param sg: Gradient in rescaled coordinates :param hess: Hessian in unscaled coordinates :param scaling: Matrix that defines scaling transformation :param g_dscaling: Unscaled gradient multiplied by derivative of scaling transformation :param delta: Trust region Radius in scaled coordinates :param theta: Stepback parameter that controls how close steps are allowed to get to the boundary :param ub: Upper boundary :param lb: Lower boundary """ self.x: np.ndarray = x self.s: Union[np.ndarray, None] = None self.sc: Union[np.ndarray, None] = None self.ss: Union[np.ndarray, None] = None self.og_s: Union[np.ndarray, None] = None self.og_sc: Union[np.ndarray, None] = None self.og_ss: Union[np.ndarray, None] = None self.sg: np.ndarray = sg.copy() self.scaling: csc_matrix = scaling.copy() self.delta: float = delta self.theta: float = theta self.lb: np.ndarray = lb self.ub: np.ndarray = ub self.br: np.ndarray = np.ones(sg.shape) self.minbr: float = 1.0 self.alpha: float = 1.0 self.iminbr: np.ndarray = np.array([]) self.qpval: float = 0.0 # B_hat (Eq 2.5) [ColemanLi1996] self.shess: np.ndarray = np.asarray(scaling * hess * scaling + g_dscaling) self.cg: Union[np.ndarray, None] = None self.chess: Union[np.ndarray, None] = None self.subspace: Union[np.ndarray, None] = None self.s0: np.ndarray = np.zeros(sg.shape) self.ss0: np.ndarray = np.zeros(sg.shape) self.reflection_indices: set = set() self.truncation_indices: set = set() self.logger: Logger = logger
def trust_region(x: np.ndarray, g: np.ndarray, hess: np.ndarray, scaling: csc_matrix, delta: float, dv: np.ndarray, theta: float, lb: np.ndarray, ub: np.ndarray, subspace_dim: SubSpaceDim, stepback_strategy: StepBackStrategy, refine_stepback: bool, logger: logging.Logger) -> Step: """ Compute a step according to the solution of the trust-region subproblem. If step-back is necessary, gradient and reflected trust region step are also evaluated in terms of their performance according to the local quadratic approximation :param x: Current values of the optimization variables :param g: Objective function gradient at x :param hess: (Approximate) objective function Hessian at x :param scaling: Scaling transformation according to distance to boundary :param delta: Trust region radius, note that this applies after scaling transformation :param dv: derivative of scaling transformation :param theta: parameter regulating stepback :param lb: lower optimization variable boundaries :param ub: upper optimization variable boundaries :param subspace_dim: Subspace dimension in which the subproblem will be solved. Larger subspaces require more compute time but can yield higher quality step proposals. :param stepback_strategy: Strategy that is applied when the proposed step exceeds the optimization boundary. :param refine_stepback: If set to True, proposed steps that are computed via the specified stepback_strategy will be refined via optimization. :param logger: logging.Logger instance to be used for logging :return: s: proposed step, ss: rescaled proposed step, qpval: expected function value according to local quadratic approximation, subspace: computed subspace for reuse if proposed step is not accepted, steptype: type of step that was selected for proposal """ sg = scaling.dot(g) g_dscaling = csc_matrix(np.diag(np.abs(g) * dv)) if subspace_dim == SubSpaceDim.TWO: tr_step = TRStep2D(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) elif subspace_dim == SubSpaceDim.FULL: tr_step = TRStepFull(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) else: raise ValueError('Invalid choice of subspace dimension.') tr_step.calculate() # in case of truncation, we hit the boundary and we check both the # gradient and the reflected step, either of which could be better than the # TR step steps = [tr_step] if tr_step.alpha < 1.0 and len(g) > 1: g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) g_step.calculate() steps.append(g_step) if stepback_strategy == StepBackStrategy.SINGLE_REFLECT: rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, tr_step) rtr_step.calculate() steps.append(rtr_step) if stepback_strategy in [ StepBackStrategy.REFLECT, StepBackStrategy.MIXED ]: steps.extend( stepback_reflect(tr_step, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb)) if stepback_strategy in [ StepBackStrategy.TRUNCATE, StepBackStrategy.MIXED ]: steps.extend( stepback_truncate(tr_step, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb)) if refine_stepback: steps.extend( stepback_refine(steps, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb)) if len(steps) > 1: rcountstrs = [ str(step.reflection_count) * int(step.reflection_count > 0) for step in steps ] logger.debug(' | '.join([ f'{step.type + rcountstr}: [qp:' f' {step.qpval:.2E}, ' f'a: {step.alpha:.2E}]' for rcountstr, step in zip(rcountstrs, steps) ])) qpvals = [step.qpval for step in steps] return steps[int(np.argmin(qpvals))]
def get_distances(node: Tuple[int, int], nodes: sparse.csc_matrix) -> List[int]: non_zero = [coord for coord in zip(*nodes.nonzero())] distances = spatial.distance.cdist([node], non_zero, metric='cityblock').flatten().tolist() return distances
def adjacency2transition(adj: csc_matrix, degree: csc_matrix = None) -> csc_matrix: """ Compute the transition matrix associated with the adjacency matrix A""" degree = adjacency2degree(adj) if degree is None else degree return adj * degree.power(-1)
def l1_normalize_row(X: sps.csc_matrix) -> sps.csc_matrix: result: sps.csc_matrix = X.astype(np.float64) result.sort_indices() l1_norms: np.ndarray = result.sum(axis=1).A1 result.data /= l1_norms[result.indices] return result
def trust_region(x: np.ndarray, g: np.ndarray, hess: np.ndarray, scaling: csc_matrix, delta: float, dv: np.ndarray, theta: float, lb: np.ndarray, ub: np.ndarray, subspace_dim: SubSpaceDim, stepback_strategy: StepBackStrategy, logger: logging.Logger) -> Step: """ Compute a step according to the solution of the trust-region subproblem. If step-back is necessary, gradient and reflected trust region step are also evaluated in terms of their performance according to the local quadratic approximation :param x: Current values of the optimization variables :param g: Objective function gradient at x :param hess: (Approximate) objective function Hessian at x :param scaling: Scaling transformation according to distance to boundary :param delta: Trust region radius, note that this applies after scaling transformation :param dv: derivative of scaling transformation :param theta: parameter regulating stepback :param lb: lower optimization variable boundaries :param ub: upper optimization variable boundaries :param subspace_dim: Subspace dimension in which the subproblem will be solved. Larger subspaces require more compute time but can yield higher quality step proposals. :param stepback_strategy: Strategy that is applied when the proposed step exceeds the optimization boundary. :param logger: logging.Logger instance to be used for logging :return: s: proposed step, """ sg = scaling.dot(g) # diag(g_k)*J^v_k Eq (2.5) [ColemanLi1994] g_dscaling = csc_matrix(np.diag(np.abs(g) * dv)) step_options = { SubSpaceDim.TWO: TRStep2D, SubSpaceDim.FULL: TRStepFull, SubSpaceDim.STEIHAUG: TRStepSteihaug, } tr_step = step_options[subspace_dim](x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) tr_step.calculate() # in case of truncation, we hit the boundary and we check both the # gradient and the reflected step, either of which could be better than the # TR step steps = [tr_step] if tr_step.alpha < 1.0 and len(g) > 1: g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) g_step.calculate() steps.append(g_step) if stepback_strategy == StepBackStrategy.SINGLE_REFLECT: rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, tr_step) rtr_step.calculate() steps.append(rtr_step) if stepback_strategy in [ StepBackStrategy.REFLECT, StepBackStrategy.MIXED ]: steps.extend( stepback_reflect(tr_step, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb)) if stepback_strategy in [ StepBackStrategy.TRUNCATE, StepBackStrategy.MIXED ]: steps.extend( stepback_truncate(tr_step, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb)) if stepback_strategy == StepBackStrategy.REFINE and \ tr_step.subspace.shape[1] > 1: ref_step = RefinedStep(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, tr_step) ref_step.calculate() steps.append(ref_step) if len(steps) > 1: rcountstrs = [ str(step.reflection_count) * int(step.reflection_count > 0) for step in steps ] logger.debug(' | '.join([ f'{step.type + rcountstr}: [qp:' f' {step.qpval:.2E}, ' f'a: {step.alpha:.2E}]' for rcountstr, step in zip(rcountstrs, steps) ])) qpvals = [step.qpval for step in steps] return steps[np.argmin(qpvals)]