class Solver(BaseSolver): name = 'Lightning' install_cmd = 'pip' requirements = ['sklearn-contrib-lightning'] requirements_import = ['lightning'] requirements_install = [ 'git+https://github.com/scikit-learn-contrib/lightning.git' ] def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd self.clf = CDRegressor(loss='squared', penalty='l1', C=1, alpha=self.lmbd, tol=1e-15) def run(self, n_iter): self.clf.max_iter = n_iter self.clf.fit(self.X, self.y) def get_result(self): return self.clf.coef_.flatten()
class Solver(BaseSolver): name = 'Lightning' install_cmd = 'conda' requirements = [ 'pip:git+https://github.com/scikit-learn-contrib/lightning.git' ] references = [ 'M. Blondel, K. Seki and K. Uehara, ' '"Block coordinate descent algorithms for large-scale sparse ' 'multiclass classification" ' 'Mach. Learn., vol. 93, no. 1, pp. 31-52 (2013)' ] def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd self.clf = CDRegressor(loss='squared', penalty='l1', C=1, alpha=self.lmbd, tol=1e-15) def run(self, n_iter): self.clf.max_iter = n_iter self.clf.fit(self.X, self.y) def get_result(self): return self.clf.coef_.flatten()
def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept self.clf = CDRegressor( loss='squared', penalty='l1', C=.5, alpha=self.lmbd, tol=1e-15 )
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd self.clf = CDRegressor(loss='squared', penalty='l1', C=1, alpha=self.lmbd, tol=1e-15)
class Solver(BaseSolver): name = 'Lightning' install_cmd = 'conda' requirements = [ 'cython', 'pip:git+https://github.com/scikit-learn-contrib/lightning.git' ] references = [ 'M. Blondel, K. Seki and K. Uehara, ' '"Block coordinate descent algorithms for large-scale sparse ' 'multiclass classification" ' 'Mach. Learn., vol. 93, no. 1, pp. 31-52 (2013)' ] def skip(self, X, y, lmbd, fit_intercept): if fit_intercept: return True, f"{self.name} does not handle fit_intercept" return False, None def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept self.clf = CDRegressor( loss='squared', penalty='l1', C=.5, alpha=self.lmbd, tol=1e-15 ) def run(self, n_iter): self.clf.max_iter = n_iter self.clf.fit(self.X, self.y) def get_result(self): beta = self.clf.coef_.flatten() if self.fit_intercept: beta = np.r_[beta, self.clf.intercept_] return beta
def fc_kernel(X, Y, copy_X=True, W=None, B=None, ret_reg=False, fit_intercept=True): """ return: n c """ assert copy_X == True assert len(X.shape) == 2 if dcfgs.ls == cfgs.solvers.gd: w = Worker() def wo(): from .GDsolver import fc_GD a, b = fc_GD(X, Y, W, B, n_iters=1) return {'a': a, 'b': b} outputs = w.do(wo) return outputs['a'], outputs['b'] elif dcfgs.ls == cfgs.solvers.tls: return tls(X, Y, debug=True) elif dcfgs.ls == cfgs.solvers.keras: _reg = keras_kernel() _reg.fit(X, Y, W, B) return _reg.coef_, _reg.intercept_ elif dcfgs.ls == cfgs.solvers.lightning: #_reg = SGDRegressor(eta0=1e-8, intercept_decay=0, alpha=0, verbose=2) _reg = CDRegressor(n_jobs=-1, alpha=0, verbose=2) if 0: _reg.intercept_ = B _reg.coef_ = W elif dcfgs.fc_ridge > 0: _reg = Ridge(alpha=dcfgs.fc_ridge) else: _reg = LinearRegression(n_jobs=-1, copy_X=copy_X, fit_intercept=fit_intercept) _reg.fit(X, Y) if ret_reg: return _reg return _reg.coef_, _reg.intercept_
def fc_kernel(X, Y, copy_X=True, W=None, B=None, ret_reg=False,fit_intercept=True): """ return: n c """ assert copy_X == True assert len(X.shape) == 2 if dcfgs.ls == cfgs.solvers.gd: w = Worker() def wo(): from .GDsolver import fc_GD a,b=fc_GD(X,Y, W, B, n_iters=1) return {'a':a, 'b':b} outputs = w.do(wo) return outputs['a'], outputs['b'] elif dcfgs.ls == cfgs.solvers.tls: return tls(X,Y, debug=True) elif dcfgs.ls == cfgs.solvers.keras: _reg=keras_kernel() _reg.fit(X, Y, W, B) return _reg.coef_, _reg.intercept_ elif dcfgs.ls == cfgs.solvers.lightning: #_reg = SGDRegressor(eta0=1e-8, intercept_decay=0, alpha=0, verbose=2) _reg = CDRegressor(n_jobs=-1,alpha=0, verbose=2) if 0: _reg.intercept_=B _reg.coef_=W elif dcfgs.fc_ridge > 0: _reg = Ridge(alpha=dcfgs.fc_ridge) else: #redprint("fc_kernel entry here") _reg = LinearRegression(n_jobs=-1 , copy_X=copy_X, fit_intercept=fit_intercept) #redprint("[in fc_kernel],X.shape=%s,Y.shape=%s"%(str(X.shape),str(Y.shape))) _reg.fit(X, Y) #用LinearRegression这个库,拟合从x(66维)到y(64维)的线性隐射 #其中Coefficients是系数部分,所以是个矩阵【64,66】:y=W*x',intercept是bias #print('Coefficients.shape:', _reg.coef_.shape) #print('intercept.shape : ', _reg.intercept_.shape) if ret_reg: return _reg return _reg.coef_, _reg.intercept_
def fit(self, df_X, df_y, batch_size=50, shuffle=True, tmpdir=None): logger.info("Fitting LightningRegression") if self.scale: # Scale motif scores df_X[:] = scale(df_X, axis=0) # Normalize across samples and features # y = df_y.apply(scale, 1).apply(scale, 0) y = df_y X = df_X.loc[y.index] if not y.shape[0] == X.shape[0]: raise ValueError("number of regions is not equal") # Define model cd = CDRegressor(penalty="l1/l2", C=1.0) parameters = {"alpha": [np.exp(-x) for x in np.arange(0, 10, 1 / 2)]} clf = GridSearchCV(cd, parameters, n_jobs=self.ncpus) if shuffle: idx = list(y.sample(y.shape[1], axis=1, random_state=42).columns) else: idx = list(y.columns) if tmpdir: if not os.path.exists(tmpdir): os.mkdir(tmpdir) coefs = pd.DataFrame(index=X.columns) start_i = 0 if tmpdir: for i in range(0, len(idx), batch_size): fname = os.path.join(tmpdir, "{}.feather".format(i)) if os.path.exists(fname) and os.path.exists(fname + ".done"): tmp = pd.read_feather(fname) tmp = tmp.set_index(tmp.columns[0]) coefs = coefs.join(tmp) else: logger.info("Resuming at batch {}".format(i)) start_i = i break for i in tqdm(range(start_i, len(idx), batch_size)): split_y = y[idx[i : i + batch_size]] # Fit model clf.fit(X.values, split_y.values) tmp = pd.DataFrame( clf.best_estimator_.coef_.T, index=X.columns, columns=split_y.columns ) if tmpdir: fname = os.path.join(tmpdir, "{}.feather".format(i)) tmp.reset_index().rename(columns=str).to_feather(fname) # Make sure we don't read corrupted files open(fname + ".done", "a").close() # Get coefficients coefs = coefs.join(tmp) # Get coefficients self.act_ = coefs[y.columns] logger.info("Done")
def dictionary(X, W2, Y, alpha=1e-4, rank=None, DEBUG=0, B2=None, rank_tol=.1, verbose=0): verbose = 0 if verbose: timer = Timer() timer.tic() if 0 and rank_tol != dcfgs.dic.rank_tol: print("rank_tol", dcfgs.dic.rank_tol) rank_tol = dcfgs.dic.rank_tol # X: N c h w, W2: n c h w norank = dcfgs.autodet if norank: rank = None #TODO remove this N = X.shape[0] c = X.shape[1] h = X.shape[2] w = h n = W2.shape[0] # TODO I forgot this # TODO support grp lasso if h == 1 and False: for i in range(2): assert Y.shape[i] == X.shape[i] pass grp_lasso = True mtl = 1 else: grp_lasso = False if norank: alpha = cfgs.alpha / c**dcfgs.dic.layeralpha if grp_lasso: reX = X.reshape((N, -1)) ally = Y.reshape((N, -1)) samples = np.random.choice(N, N // 10, replace=False) Z = reX[samples].copy() reY = ally[samples].copy() else: samples = np.random.randint(0, N, min(400, N // 20)) #samples = np.random.randint(0,N, min(400, N//20)) # c N hw reX = np.rollaxis(X.reshape((N, c, -1))[samples], 1, 0) #c hw n reW2 = np.transpose(W2.reshape((n, c, -1)), [1, 2, 0]) if dcfgs.dic.alter: W2_std = np.linalg.norm(reW2.reshape(c, -1), axis=1) # c Nn Z = np.matmul(reX, reW2).reshape((c, -1)).T # Nn reY = Y[samples].reshape(-1) if grp_lasso: if mtl: print("solver: group lasso") _solver = MultiTaskLasso(alpha=alpha, selection='random', tol=1e-1) else: _solver = Lasso(alpha=alpha, selection='random') elif dcfgs.solver == cfgs.solvers.lightning: _solver = CDRegressor(C=1 / reY.shape[0] / 2, alpha=alpha, penalty='l1', n_jobs=10) else: _solver = Lasso(alpha=alpha, warm_start=True, selection='random') #, copy_X=False #rlasso = RandomizedLasso(n_jobs=1) #embed() def solve(alpha): if dcfgs.dic.debug: return np.array(c * [True]), c _solver.alpha = alpha _solver.fit(Z, reY) #_solver.fit(Z, reY) if grp_lasso and mtl: idxs = _solver.coef_[0] != 0. else: idxs = _solver.coef_ != 0. if dcfgs.solver == cfgs.solvers.lightning: idxs = idxs[0] tmp = sum(idxs) return idxs, tmp def updateW2(idxs): nonlocal Z tmp_r = sum(idxs) reW2, _ = fc_kernel((X[:, idxs, :, :]).reshape(N, tmp_r * h * w), Y) reW2 = reW2.T.reshape(tmp_r, h * w, n) nowstd = np.linalg.norm(reW2.reshape(tmp_r, -1), axis=1) #for i in range(len(nowstd)): # if nowstd[i] == 0: # nowstd[i] = W2_std[i] reW2 = (W2_std[idxs] / nowstd)[:, np.newaxis, np.newaxis] * reW2 newshape = list(reW2.shape) newshape[0] = c newreW2 = np.zeros(newshape, dtype=reW2.dtype) newreW2[idxs, ...] = reW2 Z = np.matmul(reX, newreW2).reshape((c, -1)).T if 0: print(_solver.coef_) return reW2 if rank == c: idxs = np.array([True] * rank) elif not norank: left = 0 right = cfgs.alpha lbound = rank # - rank_tol * c if rank_tol >= 1: rbound = rank + rank_tol else: rbound = rank + rank_tol * rank #rbound = rank + rank_tol * c if rank_tol == .2: print("TODO: remove this") lbound = rank + 0.1 * rank rbound = rank + 0.2 * rank while True: _, tmp = solve(right) if False and dcfgs.dic.alter: if tmp > rank: break else: right /= 2 if verbose: print("relax right to", right) else: if tmp < rank: break else: right *= 2 if verbose: print("relax right to", right) while True: alpha = (left + right) / 2 idxs, tmp = solve(alpha) if verbose: print(tmp, alpha, left, right) if tmp > rbound: left = alpha elif tmp < lbound: right = alpha else: break if dcfgs.dic.alter: if rbound == lbound: rbound += 1 orig_step = left / 100 + 0.1 # right / 10 step = orig_step def waitstable(a): tmp = -1 cnt = 0 for i in range(10): tmp_rank = tmp idxs, tmp = solve(a) if tmp == 0: break updateW2(idxs) if tmp_rank == tmp: cnt += 1 else: cnt = 0 if cnt == 2: break if 1: if verbose: print(tmp, "Z", Z.mean(), "c", _solver.coef_.mean()) return idxs, tmp previous_Z = Z.copy() state = 0 statecnt = 0 inc = 10 while True: Z = previous_Z.copy() idxs, tmp = waitstable(alpha) if tmp > rbound: if state == 1: state = 0 step /= 2 statecnt = 0 else: statecnt += 1 if statecnt >= 2: step *= inc alpha += step elif tmp < lbound: if state == 0: state = 1 step /= 2 statecnt = 0 else: statecnt += 1 if statecnt >= 2: step *= inc alpha -= step else: break if verbose: print(tmp, alpha, 'step', step) rank = tmp else: print("start lasso kernel") idxs, rank = solve(alpha) print("end lasso kernel") # print(rank, _solver.coef_) #reg.fit(Z[:, idxs], reY) #dic = reg.coef_[np.newaxis, :, np.newaxis, np.newaxis] #newW2 = W2[:, idxs, ...]*dic if verbose: timer.toc(show='lasso') timer.tic() if grp_lasso: inW, inB = fc_kernel(reX[:, idxs], ally, copy_X=True) def preconv(a, b, res, org_res): ''' a: c c' b: n c h w res: c ''' w = np.tensordot(a, b, [[0], [1]]) r = np.tensordot(res, b, [[0], [1]]).sum((1, 2)) + org_res return np.rollaxis(w, 1, 0), r newW2, newB2 = preconv(inW, W2, inB, B2) elif dcfgs.ls == cfgs.solvers.lowparams: reg = LinearRegression(copy_X=True, n_jobs=-1) assert dcfgs.fc_ridge == 0 assert dcfgs.dic.alter == 0, "Z changed" reg.fit(Z[:, idxs], reY) newW2 = reg.coef_[np.newaxis, :, np.newaxis, np.newaxis] * W2[:, idxs, :, :] newB2 = reg.intercept_ elif dcfgs.nonlinear_fc: newW2, newB2 = nonlinear_fc(X[:, idxs, ...].reshape((N, -1)), Y) newW2 = newW2.reshape((n, rank, h, w)) elif dcfgs.nofc: newW2 = W2[:, idxs, :, :] newB2 = np.zeros(n) else: newW2, newB2 = fc_kernel(X[:, idxs, ...].reshape((N, -1)), Y, W=W2[:, idxs, ...].reshape(n, -1), B=B2) newW2 = newW2.reshape((n, rank, h, w)) if verbose: timer.toc(show='ls') if not norank: cfgs.alpha = alpha if verbose: print(rank) if DEBUG: #print(np.where(idxs)) newX = X[:, idxs, ...] return newX, newW2, newB2 else: return idxs, newW2, newB2