def explain_node(self, node_idx, x, edge_index, **kwargs): probas = self.__init_predict__(x, edge_index, **kwargs) x, probas, _, _, _, _ = self.__subgraph__(node_idx, x, probas, edge_index, **kwargs) x = x.detach().cpu().numpy() # (n, d) y = probas.detach().cpu().numpy() # (n, classes) n, d = x.shape K = self.__compute_kernel__(x, reduce=False) # (n, n, d) L = self.__compute_kernel__(y, reduce=True) # (n, n, 1) K_bar = self.__compute_gram_matrix__(K) # (n, n, d) L_bar = self.__compute_gram_matrix__(L) # (n, n, 1) K_bar = K_bar.reshape(n**2, d) # (n ** 2, d) L_bar = L_bar.reshape(n**2, ) # (n ** 2,) solver = LassoLars(self.rho, fit_intercept=False, normalize=False, positive=True) solver.fit(K_bar * n, L_bar * n) return solver.coef_
def RunLARSScikit(): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) inputData = np.genfromtxt(self.dataset[0], delimiter=',') responsesData = np.genfromtxt(self.dataset[1], delimiter=',') opts = {} if "lambda1" in options: opts["alpha"] = float(options.pop("lambda1")) if "max_iterations" in options: opts["max_iter"] = int(options.pop("max_iterations")) if "epsilon" in options: opts["eps"] = float(options.pop("epsilon")) if len(options) > 0: Log.Fatal("Unknown parameters: " + str(options)) raise Exception("unknown parameters") try: with totalTimer: # Perform LARS. model = LassoLars(**opts) model.fit(inputData, responsesData) out = model.coef_ except Exception as e: return -1 return totalTimer.ElapsedTime()
def RunLARSScikit(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) inputData = np.genfromtxt(self.dataset[0], delimiter=',') responsesData = np.genfromtxt(self.dataset[1], delimiter=',') try: with totalTimer: # Get all the parameters. lambda1 = re.search("-l (\d+)", options) lambda1 = 0.0 if not lambda1 else int(lambda1.group(1)) # Perform LARS. model = LassoLars(alpha=lambda1) model.fit(inputData, responsesData) out = model.coef_ except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def cv_train_lasso_lars_with_sparse_refit(x_train, y_train, pval_cutoff=0.001, do_sparse_refit=True): model = LassoLarsCV(n_jobs=-1, cv=min(x_train.shape[0], 10)) model.fit(x_train, y_train) best_alpha_idx = int(np.argwhere(model.alpha_ == model.cv_alphas_)) if do_sparse_refit: sparse_alpha_idx = -1 for i in range(best_alpha_idx + 1, len(model.cv_alphas_)): pval = ttest_ind(model.mse_path_[best_alpha_idx], model.mse_path_[i]).pvalue if pval < pval_cutoff: sparse_alpha_idx = i - 1 break if sparse_alpha_idx == -1: # take the sparsest solution sparse_alpha_idx = len(model.cv_alphas_) - 1 model_sparse = LassoLars(alpha=model.cv_alphas_[sparse_alpha_idx]) model_sparse.fit(x_train, y_train) return model_sparse else: return model
def LassoLars_score(X,y,**l1_parameters): """ Score predictor based on `scikit-learn`_ LassoLars regression. Args: X (pandas.DataFrame): Transcriptor factor gene expressions where rows are experimental conditions and columns are transcription factors y (pandas.Series): Target gene expression vector where rows are experimental conditions **l1_parameters: Named parameters for sklearn Lasso regression Returns: numpy.array: co-regulation scores. The i-th element of the score array represents the score assigned by the sklearn LassoLars regressor to the regulatory relationship between the target gene and transcription factor i. Examples: >>> import pandas as pd >>> import numpy as np >>> np.random.seed(0) >>> tfs = pd.DataFrame(np.random.randn(5,3), index =["c1","c2","c3","c4","c5"], columns=["tf1","tf2","tf3"]) >>> tg = pd.Series(np.random.randn(5),index=["c1","c2","c3","c4","c5"]) >>> scores = LassoLars_score(tfs,tg, alpha=0.01) >>> scores array([0.12179406, 0.92205553, 0.15503451]) """ regressor = LassoLars(**l1_parameters) regressor.fit(X, y) scores = np.abs(regressor.coef_) return(scores)
def lassolarsdimension(data, label): lassolarscv = LassoLarsCV(cv=5, max_iter=400).fit(data, label) lassolars = LassoLars(alpha=lassolarscv.alpha_) #生成LassoLars对象 x_lassolars = lassolars.fit(data, label) mask = x_lassolars.coef_ != 0 new_data = data[:, mask] return new_data, mask
def online_dict_learning(X, lmda, D_0, T, k_cluster, eps, _NF=200): ''' algo 1 in the paper D_0: R^(m * k) X: R^(n * m) ''' n_dim, m_dim = X.shape A_t = np.zeros((k_cluster, k_cluster)) B_t = np.zeros((m_dim, k_cluster)) D_t = D_0 t_start = time.time() # print(lmda, _NF, eps) for t in range(T): # t_start_online = time.time() sample_idx = np.random.randint(0, n_dim) x_sample = X[sample_idx, :] lars_lasso = LassoLars(alpha=lmda) lars_lasso.fit(D_t, x_sample) alpha_t = lars_lasso.coef_ A_t += np.matmul(alpha_t.reshape(k_cluster, 1), alpha_t.reshape(1, k_cluster)) B_t += np.matmul(x_sample.reshape(m_dim, 1), alpha_t.reshape(1, k_cluster)) D_t = dict_update(D_t, A_t, B_t, eps=eps, _NF=_NF) # print('===== Iteration in online dictionary learning cost {:.04f}s'.format(time.time() - t_start_online)) print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() - t_start)) return D_t
def Lasso_fit(alpha, x, y): solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000) solver.alpha = alpha solver.fit(x, y) idxs = solver.coef_ != 0. c_cal = sum(idxs) return idxs, c_cal
def __init__( self, propensity_learner=None, learner=None, learner_c=None, learner_t=None, delta=0.001, ): """Setup a DoubleRobustEstimator Args: propensity_learner: a classifier model with probability estimation method `predict_proba` like a sklearn LogisticRegression learner: generic outcome regression model for both outcomes learner_c: specific control outcome regression model learner_t: specific treatment outcome regression model """ self.propensity_learner = propensity_learner if learner is None: if learner_c is None and learner_t is None: self.learner_c = LassoLars() self.learner_t = LassoLars() else: self.learner_c = learner_c self.learner_t = learner_t else: self.learner_c = copy.deepcopy(learner) self.learner_t = copy.deepcopy(learner) self.delta = delta
def LARS_EN(Y, X, reg_param, reg_param1): ''' function takes - Y: p x 1 target variable - X: n x p dataset - reg_param: regularization parameter for l2-norm - reg_param1: regularization parameter for l1-norm function returns - beta: 1 x p vector with coefficients ''' # Find the number of features p = X.shape[1] # Create the artificial dataset for the naïve elastic net X = np.power(1 + reg_param, -0.5) * np.vstack( (X, np.sqrt(reg_param) * np.identity(p))) Y = np.vstack((Y, np.zeros(shape=(p, 1)))) gamma = reg_param1 / np.sqrt(1 + reg_param) # Center X X = StandardScaler(with_std=False).fit_transform(X) # Use the LARS (Efron 2004) algorithm to solve this lasso regression lasso = LassoLars(alpha=gamma, fit_intercept=False, max_iter=1000) lasso.fit(X, Y) # Transform the found coefficients in the elastic net coefficients beta = lasso.coef_ / np.sqrt(1 + reg_param) return beta
def dataPreprocess(): """ Description:使用最小角回归Lasso算法进行特征压缩 Params: Return: Author: HY Modify: 2019/6/21 16:37 """ inputFile = 'data/data1.csv' outputFile = 'tmp/newData.csv' data = pd.read_csv(inputFile) model=LassoLars(alpha=4,max_iter=1000) model.fit(data.iloc[:,0:13],data['y']) coefs=model.coef_ print(coefs) # model = Lasso(alpha=1.0,max_iter=1000000,tol=0.00000001) # model.fit(data.iloc[:, 0:13], data['y']) # coefs=model.coef_ # print(coefs) newColumns=[] for index,column in enumerate(data.columns[0:13]): if coefs[index]!=0: newColumns.append(column) newColumns.append(data.columns[13]) newData=pd.DataFrame(data[newColumns])#用Copy()是为了避免出现链式问题 newData['year']=list(range(1994,2014,1)) newData.to_csv(outputFile,index=False)
def train(self): """""" start = time.time() print('size before truncated outliers is %d ' % len(self.TrainData)) TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)] print('size after truncated outliers is %d ' % len(self.TrainData)) TrainData['longitude'] -= -118600000 TrainData['latitude'] -= 34220000 #extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train') #self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1) X = self.TrainData.drop(self._l_drop_cols, axis=1) Y = self.TrainData['logerror'] self._l_train_columns = X.columns X = X.values.astype(np.float32, copy=False) lr = LassoLars(alpha= self._lr_alpha, max_iter= self._lr_iter, verbose= True) self._model = lr.fit(X, Y) end = time.time() print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start))) self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')) #with open(self._f_eval_train_model, 'wb') as o_file: # pickle.dump(self._model, o_file, -1) #o_file.close() #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]], # ignore_index=True) ## ignore_index will reset the index or index will be overlaped return
def lasso_subproblem(self, Xt): ''' function which performs: - 4: Sparse coding with LARS INPUTS: - self - Xt, data array - A, matrix - B, matrix - t, iter number OUTPUT: - coef ''' print "inside lasso" # 4: Sparse coding with LARS from sklearn.linear_model import LassoLars lars = LassoLars(alpha=self.alpha, verbose=False) # self.components = np.matrix([[8,2,3,4],[1,6,1,99]]) # Xt = np.matrix([[3,1],[6,7]]) # Xt[1,1] = 9999 lars.fit(self.components, Xt) coef = lars.coef_ # print coef coef = (np.asmatrix(coef)).T # Dimension control if self.verbose > 20: print "coef shape :", coef.shape return coef
def OnceTest(dataMat, labelMat): clf1 = LinearRegression() clf1.fit(dataMat[0:99], labelMat[0:99]) labelTest1 = clf1.predict(dataMat[100:199]) print('default LinearRegression', ((labelTest1 - labelMat[100:199])**2).sum()) clf2 = Ridge(alpha=1, max_iter=100, tol=0.001) clf2.fit(dataMat[0:99], labelMat[0:99]) labelTest2 = clf2.predict(dataMat[100:199]) print('Ridge alhpa=1 max_iter=100 tol=0.001', ((labelTest2 - labelMat[100:199])**2).sum()) clf3 = Lasso(alpha=1, max_iter=100, tol=0.001) clf3.fit(dataMat[0:99], labelMat[0:99]) labelTest3 = clf3.predict(dataMat[100:199]) print('Lasso alhpa=1 max_iter=100 tol=0.001', ((labelTest3 - labelMat[100:199])**2).sum()) clf4 = ElasticNet(alpha=1, l1_ratio=0.5, max_iter=100, tol=1e-4) clf4.fit(dataMat[0:99], labelMat[0:99]) labelTest4 = clf4.predict(dataMat[100:199]) print('ElasticNet alhpa=1 max_iter=100 tol=0.001', ((labelTest4 - labelMat[100:199])**2).sum()) clf5 = LassoLars(alpha=1, max_iter=100) clf5.fit(dataMat[0:99], labelMat[0:99]) labelTest5 = clf4.predict(dataMat[100:199]) print('LassoLars alhpa=1 max_iter=100', ((labelTest5 - labelMat[100:199])**2).sum())
def LassoRegression(X_train, X_test, y_train, y_test): regr = LassoLars(alpha=0.1) print len(X_train.values.tolist()[0]) print len(X_train.values.tolist()) regr.fit(X_train.values.tolist(), y_train.values.tolist()) predictions = regr.predict(X_test) return predictions
class LassoLarsPrim(primitive): def __init__(self, random_state=0): super(LassoLarsPrim, self).__init__(name='LassoLars') self.hyperparams = [] self.type = 'Regressor' self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients." self.hyperparams_run = {'default': True} self.random_state = random_state self.model = LassoLars(alpha=0.1) self.accept_type = 'c_r' def can_accept(self, data): return self.can_accept_c(data, 'Regression') def is_needed(self, data): # data = handle_data(data) return True def fit(self, data): data = handle_data(data) self.model.fit(data['X'], data['Y']) def produce(self, data): output = handle_data(data) output['predictions'] = self.model.predict(output['X']) output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"]) final_output = {0: output} return final_output
def trainAlgo(self): self.model = LassoLars(alpha=self.param['alpha'], normalize=self.param['normalize'], fit_intercept=self.param['fit_intercept'], max_iter=self.param['max_iter'], positive=self.param['positive']) self.model.fit(self.inputData['X'], self.outputData['Y'])
def RunLARSScikit(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) inputData = np.genfromtxt(self.dataset[0], delimiter=',') responsesData = np.genfromtxt(self.dataset[1], delimiter=',') lambda1 = re.search("-l (\d+)", options) lambda1 = 1.0 if not lambda1 else float(lambda1.group(1)) max_iter1 = re.search("--max_iter (\d+)", options) max_iter1 = 500 if not max_iter1 else int(max_iter1.group(1)) eps1 = re.search("--eps (\d+)", options) eps1 = np.finfo(float).eps if not eps1 else float(eps1.group(1)) try: with totalTimer: # Perform LARS. model = LassoLars(alpha=lambda1, max_iter=max_iter1, eps=eps1) model.fit(inputData, responsesData) out = model.coef_ except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def Lasso(x_train, y_train, x_test, y_test): estimator = LassoLars() estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score))
def predict_LarsLasso(X, y, train, test, alpha=0.1): # Fit lars = LassoLars(alpha) lars.fit(X.iloc[train], y.iloc[train]) # Predict prediction = lars.predict(X.iloc[test]) return prediction
def update_spatial_perpx(y, alpha, sub, C): res = np.zeros_like(sub, dtype=y.dtype) if np.sum(sub) > 0: C = C[:, sub] clf = LassoLars(alpha=alpha, positive=True) coef = clf.fit(C, y).coef_ res[np.where(sub)[0]] = coef return res
def select(self,X,y,weight,alpha=0.01): lars = LassoLars(normalize=False,alpha=alpha) lars.fit(X,y) path_idx = np.argwhere((lars.coef_path_ != 0).sum(axis=0) <= self.n_features)[-1,0] coef = lars.coef_path_[:,path_idx] f_indices = np.argwhere(coef != 0).T[0] if len(f_indices) == 0: f_indices = self.select(X,y,alpha=alpha * 0.01) return f_indices
def lasso_lars(X, y): #train model lars = LassoLars(alpha=0.1)\ .fit(X, y) lars_pred = lars.predict(X) lars_rmse = sqrt(mean_squared_error(y, lars_pred)) return lars_rmse
def __init__(self, random_state=0): super(LassoLarsPrim, self).__init__(name='LassoLars') self.hyperparams = [] self.type = 'Regressor' self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients." self.hyperparams_run = {'default': True} self.random_state = random_state self.model = LassoLars(alpha=0.1) self.accept_type = 'c_r'
def LassoLarsTest(dataMat, labelMat): clf1 = LassoLars(alpha=1, max_iter=100) clf1.fit(dataMat[0:99], labelMat[0:99]) labelTest1 = clf1.predict(dataMat[100:199]) print('LassoLars ', ((labelTest1 - labelMat[100:199])**2).sum()) clf2 = LassoLarsCV(max_n_alphas=10, max_iter=100) clf2.fit(dataMat[0:99], labelMat[0:99]) labelTest2 = clf2.predict(dataMat[100:199]) print('LassoLarsCV', ((labelTest2 - labelMat[100:199])**2).sum())
def Lars_Lasso(kf,data,label,k): val=0 for train, test in kf: X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test] log = LassoLars(alpha=.1) logit = log.fit(X_train,y_train) y_pred = logit.predict(X_test) val+= metrics.mean_squared_error(y_test, y_pred) return val/3
def ll_validate_test(X, y, X_vt, y_vt): #train model lars = LassoLars(alpha=0.1)\ .fit(X, y) #validate model lars_pred_v = lars.predict(X_vt) lars_rmse_v = sqrt(mean_squared_error(y_vt, lars_pred_v)) return lars_rmse_v
def _load_model(cls, fh): params = _parse_literal(fh) active = _parse_literal(fh) coef_shape = _parse_literal(fh) m = LassoLars().set_params(**params) m.intercept_ = 0.0 n = int(np.prod(coef_shape)) * 8 m.coef_ = np.fromstring(fh.read(n)).reshape(coef_shape) m.active_ = active return m
def scaledlasso(self, X, y, intercept, lam0=None, sigma=None): n, p = X.shape if lam0 == None: if p > pow(10, 6): lam0 = 'univ' else: lam0 = 'quantile' if lam0 == 'univ' or lam0 == 'universal': lam0 = np.sqrt(2 * np.log10(p) / n) if lam0 == 'quantile': L = 0.1 Lold = 0 while (np.abs(L - Lold) > 0.001): k = (L**4 + 2 * L**2) Lold = L L = -norm.ppf(np.min(k/p,0.99)) L = (L + Lold) / 2 if (p == 1): L = 0.5 lam0 = np.sqrt(2 / n) * L sigmaint = 0.1 sigmanew = 5 flag = 0 objlasso = LassoLars(fit_intercept=False,eps=0.001,fit_path=True) objlasso.fit(X,y) while abs(sigmaint - sigmanew) > 0.0001 and flag <= 100: flag = flag + 1 sigmaint = np.copy(sigmanew) lam = lam0 * sigmaint s = lam * n lams = objlasso.alphas_ s[np.where(s>np.max(lams))[0]]=np.max(lams) s[np.where(s<0)[0]]=0 sfrac = (s-s[0])/(s[p-1]-s[0]) s = (s-s[0])/(s[p-1]-s[0]) hbeta = objlasso.coef_ hy = np.dot(X,hbeta) sigmanew = np.sqrt(np.mean(np.square(y - hy))) sigmahat = sigmanew hlam = lam if sigma == None: sigmahat = np.sqrt(np.sum(np.square(y - hy)) / (n - np.sum(hbeta != 0))) return hbeta, sigmahat
class in_lassoLars(regression): def trainAlgo(self): self.model = LassoLars(alpha=self.param['alpha'], normalize=self.param['normalize'], fit_intercept=self.param['fit_intercept'], max_iter=self.param['max_iter'], positive=self.param['positive']) self.model.fit(self.inputData['X'], self.outputData['Y']) def predictAlgo(self): self.result['Y'] = self.model.predict(self.inputData['X'])
def metric(self): totalTimer = Timer() with totalTimer: model = LassoLars(**self.build_opts) model.fit(self.data[0], self.data[1]) out = model.coef_ metric = {} metric["runtime"] = totalTimer.ElapsedTime() return metric
def adaptiveLasso(): ''' Adaptive-Lasso变量选择模型 :return: ''' inputfile = 'data/data1.csv' data = pd.read_csv(inputfile) # 导入AdaptiveLasso算法,要在较新的Scikit-Learn才有 from sklearn.linear_model import LassoLars model = LassoLars() model.fit(data.iloc[:, 0:13], data['y']) print(model.coef_)
def fit_model_11(self,toWrite=False): model = LassoLars(alpha=1,max_iter=5000) for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict(X_test) print("Model 11 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model11/model.pkl','w') pickle.dump(model,f2) f2.close()
def lasso_subproblem(self, Xt, comp): print "inside lasso" # 4: Sparse coding with LARS lars = LassoLars(alpha=self.alpha, verbose=False) lars.fit(comp, Xt) coef = lars.coef_ # print coef coef = (np.asmatrix(coef)).T # Dimension control if self.verbose > 20: print "coef shape :", coef.shape return coef
def lasso_sklearn(dict, target, gamma): """ Computes Lasso optimization :param dict: dictionnary :type dict: np.array :param target: image :type target: np.array :param gamma: regularization factor :type gamma: float :rtype: np.array """ num_samples = target.shape[1] patch_size = dict.shape[0] dic_size = dict.shape[1] gamma /= num_samples ll = LassoLars(alpha=gamma, fit_intercept=False, normalize=False, fit_path=False) ll.fit(dict, target) alpha = ll.coef_ alpha = alpha.reshape(dic_size, num_samples) return alpha
# LassoLars Regression import numpy as np from sklearn import datasets from sklearn.linear_model import LassoLars # load the iris datasets dataset = datasets.load_diabetes() # fit a LASSO using LARS model to the data model = LassoLars(alpha=0.1) model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
def ProcessData(df,vect1,vect2,builder): descriptionmatrix = vect1.transform([str(x) for x in df['titledescription'].values]) locationmatrix = vect2.transform([str(x) for x in df['locationfull'].values]) # x = build_design_matrices([builder], df, return_type='dataframe', NA_action=NAAction(on_NA='drop', NA_types=[])) y = df['SalaryNormalized'].values #x_combo = np.hstack([np.asarray(x[0]),descriptionmatrix.toarray(),locationmatrix.toarray()]) x_combo = np.hstack([descriptionmatrix.toarray(),locationmatrix.toarray()]) return (np.asarray(y), sparse.coo_matrix(x_combo)) train = PreProcess(pd.read_csv('train.csv')) (vect1,vect2,builder) = InitializeTransformers(train) (y, x) = ProcessData(train, vect1, vect2,builder) (y_test, x_test) = ProcessData(PreProcess(pd.read_csv('solution.csv')),vect1,vect2,builder) lasso = Lasso() lasso.fit(x,y) y_pred = lasso.predict(x_test) lassolars = LassoLars(alpha=2) lassolars.fit(x.toarray(),y) lars_pred = lassolars.predict(x_test) print np.sqrt(mean_squared_error(y_test, y_pred)) print r2_score(y_test,y_pred) print np.sqrt(mean_squared_error(y_test,lars_pred)) print r2_score(y_test,lars_pred)