def calculate_Q(X: csc_matrix, update_in_place: bool = True) -> np.ndarray: """ :param X: a CSC matrix of shape (rows=features, cols=observations). :param update_in_place: whether or not to return a new Numpy array or modify the input X. :return: the word-word correlation matrix Q as a dense Numpy ndarray. """ n_features, n_observations = X.shape diagonal = np.zeros(n_features) if not update_in_place: X = X.copy() for col_idx in range(X.indptr.size - 1): col_start = X.indptr[col_idx] col_end = X.indptr[col_idx + 1] col_entries = X.data[col_start:col_end] col_sum = np.sum(col_entries) row_indices = X.indices[col_start:col_end] # TODO: figure out whether this loop and update by division can be written in more idiomatic Numpy diagonal[row_indices] += col_entries / (col_sum * (col_sum - 1)) X.data[col_start:col_end] = col_entries / sqrt(col_sum * (col_sum - 1)) Q = X * X.T / n_observations Q = np.array(Q.todense(), copy=False) diagonal = diagonal / n_observations Q = Q - np.diag(diagonal) return Q
def SLIM_parallel(A: sp.csc_matrix, alpha: float, l1_ratio: float, max_iter: int): ''' SLIM: Sparse Linear Methods for Top-N Recommender Systems - Xia Ning ; George Karypis https://ieeexplore.ieee.org/document/6137254 Run Sparse Linear Models over the rating matrix A. (code from https://github.com/ruhan/toyslim/blob/master/slim_parallel.py) :param A: Rating matrix nxm where m in the number of items. Must be a csc_matrix :param alpha: a + b (a and b are the multipliers of the L1 and L2 penalties, respectively) :param l1_ratio: a / (a + b) (a and b are the multipliers of the L1 and L2 penalties, respectively) :param max_iter: number of iterations to run max for each item :return: W weight matrix. ''' warnings.simplefilter("ignore") n_items = A.shape[1] ranges = generate_slices(n_items) separated_tasks = [] for from_j, to_j in ranges: separated_tasks.append( [from_j, to_j, A.copy(), alpha, l1_ratio, max_iter]) with multiprocessing.Pool() as pool: results = pool.map(work, separated_tasks) W_rows_idxs = list(itertools.chain(*[x[0] for x in results])) W_cols_idxs = list(itertools.chain(*[x[1] for x in results])) W_data = list(itertools.chain(*[x[2] for x in results])) W = sp.csr_matrix((W_data, (W_rows_idxs, W_cols_idxs)), shape=(n_items, n_items)) return W
def __init__(self, x: np.ndarray, sg: np.ndarray, hess: np.ndarray, scaling: csc_matrix, g_dscaling: csc_matrix, delta: float, theta: float, ub: np.ndarray, lb: np.ndarray, logger: Logger): """ :param x: Reference point :param sg: Gradient in rescaled coordinates :param hess: Hessian in unscaled coordinates :param scaling: Matrix that defines scaling transformation :param g_dscaling: Unscaled gradient multiplied by derivative of scaling transformation :param delta: Trust region Radius in scaled coordinates :param theta: Stepback parameter that controls how close steps are allowed to get to the boundary :param ub: Upper boundary :param lb: Lower boundary """ self.x: np.ndarray = x self.s: Union[np.ndarray, None] = None self.sc: Union[np.ndarray, None] = None self.ss: Union[np.ndarray, None] = None self.og_s: Union[np.ndarray, None] = None self.og_sc: Union[np.ndarray, None] = None self.og_ss: Union[np.ndarray, None] = None self.sg: np.ndarray = sg.copy() self.scaling: csc_matrix = scaling.copy() self.delta: float = delta self.theta: float = theta self.lb: np.ndarray = lb self.ub: np.ndarray = ub self.br: np.ndarray = np.ones(sg.shape) self.minbr: float = 1.0 self.alpha: float = 1.0 self.iminbr: np.ndarray = np.array([]) self.qpval: float = 0.0 # B_hat (Eq 2.5) [ColemanLi1996] self.shess: np.ndarray = np.asarray(scaling * hess * scaling + g_dscaling) self.cg: Union[np.ndarray, None] = None self.chess: Union[np.ndarray, None] = None self.subspace: Union[np.ndarray, None] = None self.s0: np.ndarray = np.zeros(sg.shape) self.ss0: np.ndarray = np.zeros(sg.shape) self.reflection_indices: set = set() self.truncation_indices: set = set() self.logger: Logger = logger