예제 #1
0
    def explain_node(self, node_idx, x, edge_index, **kwargs):
        probas = self.__init_predict__(x, edge_index, **kwargs)

        x, probas, _, _, _, _ = self.__subgraph__(node_idx, x, probas,
                                                  edge_index, **kwargs)

        x = x.detach().cpu().numpy()  # (n, d)
        y = probas.detach().cpu().numpy()  # (n, classes)

        n, d = x.shape

        K = self.__compute_kernel__(x, reduce=False)  # (n, n, d)
        L = self.__compute_kernel__(y, reduce=True)  # (n, n, 1)

        K_bar = self.__compute_gram_matrix__(K)  # (n, n, d)
        L_bar = self.__compute_gram_matrix__(L)  # (n, n, 1)

        K_bar = K_bar.reshape(n**2, d)  # (n ** 2, d)
        L_bar = L_bar.reshape(n**2, )  # (n ** 2,)

        solver = LassoLars(self.rho,
                           fit_intercept=False,
                           normalize=False,
                           positive=True)

        solver.fit(K_bar * n, L_bar * n)

        return solver.coef_
예제 #2
0
        def RunLARSScikit():
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

            opts = {}
            if "lambda1" in options:
                opts["alpha"] = float(options.pop("lambda1"))
            if "max_iterations" in options:
                opts["max_iter"] = int(options.pop("max_iterations"))
            if "epsilon" in options:
                opts["eps"] = float(options.pop("epsilon"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    # Perform LARS.
                    model = LassoLars(**opts)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                return -1

            return totalTimer.ElapsedTime()
예제 #3
0
파일: lars.py 프로젝트: rancho93/benchmarks
    def RunLARSScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

      try:
        with totalTimer:
          # Get all the parameters.
          lambda1 = re.search("-l (\d+)", options)
          lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

          # Perform LARS.
          model = LassoLars(alpha=lambda1)
          model.fit(inputData, responsesData)
          out = model.coef_
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
예제 #4
0
def cv_train_lasso_lars_with_sparse_refit(x_train,
                                          y_train,
                                          pval_cutoff=0.001,
                                          do_sparse_refit=True):
    model = LassoLarsCV(n_jobs=-1, cv=min(x_train.shape[0], 10))
    model.fit(x_train, y_train)
    best_alpha_idx = int(np.argwhere(model.alpha_ == model.cv_alphas_))

    if do_sparse_refit:
        sparse_alpha_idx = -1
        for i in range(best_alpha_idx + 1, len(model.cv_alphas_)):
            pval = ttest_ind(model.mse_path_[best_alpha_idx],
                             model.mse_path_[i]).pvalue

            if pval < pval_cutoff:
                sparse_alpha_idx = i - 1
                break

        if sparse_alpha_idx == -1:
            # take the sparsest solution
            sparse_alpha_idx = len(model.cv_alphas_) - 1

        model_sparse = LassoLars(alpha=model.cv_alphas_[sparse_alpha_idx])
        model_sparse.fit(x_train, y_train)

        return model_sparse
    else:
        return model
예제 #5
0
def LassoLars_score(X,y,**l1_parameters):
    """
    Score predictor based on `scikit-learn`_ LassoLars regression.

    Args:
        X (pandas.DataFrame): Transcriptor factor gene expressions where rows
            are experimental conditions and columns are transcription factors
        y (pandas.Series): Target gene expression vector where rows are
            experimental conditions
        **l1_parameters: Named parameters for sklearn Lasso regression

    Returns:
        numpy.array: co-regulation scores.

        The i-th element of the score array represents the score assigned by the
        sklearn LassoLars regressor to the regulatory relationship between the
        target gene and transcription factor i.

    Examples:
        >>> import pandas as pd
        >>> import numpy as np
        >>> np.random.seed(0)
        >>> tfs = pd.DataFrame(np.random.randn(5,3),
                               index =["c1","c2","c3","c4","c5"],
                               columns=["tf1","tf2","tf3"])
        >>> tg = pd.Series(np.random.randn(5),index=["c1","c2","c3","c4","c5"])
        >>> scores = LassoLars_score(tfs,tg, alpha=0.01)
        >>> scores
        array([0.12179406, 0.92205553, 0.15503451])
    """
    regressor = LassoLars(**l1_parameters)
    regressor.fit(X, y)
    scores = np.abs(regressor.coef_)
    return(scores)
예제 #6
0
def lassolarsdimension(data, label):
    lassolarscv = LassoLarsCV(cv=5, max_iter=400).fit(data, label)
    lassolars = LassoLars(alpha=lassolarscv.alpha_)  #生成LassoLars对象
    x_lassolars = lassolars.fit(data, label)
    mask = x_lassolars.coef_ != 0
    new_data = data[:, mask]
    return new_data, mask
예제 #7
0
def online_dict_learning(X, lmda, D_0, T, k_cluster, eps, _NF=200):
    '''
    algo 1 in the paper
    D_0: R^(m * k)
    X: R^(n * m)
    '''
    n_dim, m_dim = X.shape
    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    D_t = D_0

    t_start = time.time()
    # print(lmda, _NF, eps)
    for t in range(T):
        # t_start_online = time.time()
        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]

        lars_lasso = LassoLars(alpha=lmda)
        lars_lasso.fit(D_t, x_sample)
        alpha_t = lars_lasso.coef_

        A_t += np.matmul(alpha_t.reshape(k_cluster, 1),
                         alpha_t.reshape(1, k_cluster))
        B_t += np.matmul(x_sample.reshape(m_dim, 1),
                         alpha_t.reshape(1, k_cluster))

        D_t = dict_update(D_t, A_t, B_t, eps=eps, _NF=_NF)
        # print('===== Iteration in online dictionary learning cost {:.04f}s'.format(time.time() - t_start_online))
    print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() -
                                                                t_start))
    return D_t
예제 #8
0
파일: lars.py 프로젝트: rcurtin/benchmarks
    def RunLARSScikit():
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

      opts = {}
      if "lambda1" in options:
        opts["alpha"] = float(options.pop("lambda1"))
      if "max_iterations" in options:
        opts["max_iter"] = int(options.pop("max_iterations"))
      if "epsilon" in options:
        opts["eps"] = float(options.pop("epsilon"))

      if len(options) > 0:
        Log.Fatal("Unknown parameters: " + str(options))
        raise Exception("unknown parameters")

      try:
        with totalTimer:
          # Perform LARS.
          model = LassoLars(**opts)
          model.fit(inputData, responsesData)
          out = model.coef_
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
예제 #9
0
def Lasso_fit(alpha, x, y):
    solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)
    solver.alpha = alpha
    solver.fit(x, y)
    idxs = solver.coef_ != 0.
    c_cal = sum(idxs)
    return idxs, c_cal
예제 #10
0
    def __init__(
        self,
        propensity_learner=None,
        learner=None,
        learner_c=None,
        learner_t=None,
        delta=0.001,
    ):
        """Setup a DoubleRobustEstimator

        Args:
            propensity_learner: a classifier model with probability estimation method
                `predict_proba` like a sklearn LogisticRegression
            learner: generic outcome regression model for both outcomes
            learner_c: specific control outcome regression model
            learner_t: specific treatment outcome regression model
        """
        self.propensity_learner = propensity_learner

        if learner is None:
            if learner_c is None and learner_t is None:
                self.learner_c = LassoLars()
                self.learner_t = LassoLars()
            else:
                self.learner_c = learner_c
                self.learner_t = learner_t

        else:
            self.learner_c = copy.deepcopy(learner)
            self.learner_t = copy.deepcopy(learner)

        self.delta = delta
예제 #11
0
def LARS_EN(Y, X, reg_param, reg_param1):
    '''
    function takes
    - Y: p x 1 target variable
    - X: n x p dataset
    - reg_param: regularization parameter for l2-norm
    - reg_param1: regularization parameter for l1-norm
    
    function returns
    - beta: 1 x p vector with coefficients
    '''
    # Find the number of features
    p = X.shape[1]

    # Create the artificial dataset for the naïve elastic net
    X = np.power(1 + reg_param, -0.5) * np.vstack(
        (X, np.sqrt(reg_param) * np.identity(p)))
    Y = np.vstack((Y, np.zeros(shape=(p, 1))))
    gamma = reg_param1 / np.sqrt(1 + reg_param)

    # Center X
    X = StandardScaler(with_std=False).fit_transform(X)

    # Use the LARS (Efron 2004) algorithm to solve this lasso regression
    lasso = LassoLars(alpha=gamma, fit_intercept=False, max_iter=1000)
    lasso.fit(X, Y)

    # Transform the found coefficients in the elastic net coefficients
    beta = lasso.coef_ / np.sqrt(1 + reg_param)

    return beta
예제 #12
0
        def RunLARSScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')

            try:
                with totalTimer:
                    # Get all the parameters.
                    lambda1 = re.search("-l (\d+)", options)
                    lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

                    # Perform LARS.
                    model = LassoLars(alpha=lambda1)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
예제 #13
0
def dataPreprocess():
    """
        Description:使用最小角回归Lasso算法进行特征压缩
        Params:

        Return:

        Author:
                HY
        Modify:
                2019/6/21 16:37
    """
    inputFile = 'data/data1.csv'
    outputFile = 'tmp/newData.csv'
    data = pd.read_csv(inputFile)
    model=LassoLars(alpha=4,max_iter=1000)
    model.fit(data.iloc[:,0:13],data['y'])
    coefs=model.coef_
    print(coefs)
    # model = Lasso(alpha=1.0,max_iter=1000000,tol=0.00000001)
    # model.fit(data.iloc[:, 0:13], data['y'])
    # coefs=model.coef_
    # print(coefs)
    newColumns=[]
    for index,column in enumerate(data.columns[0:13]):
        if coefs[index]!=0:
            newColumns.append(column)
    newColumns.append(data.columns[13])
    newData=pd.DataFrame(data[newColumns])#用Copy()是为了避免出现链式问题
    newData['year']=list(range(1994,2014,1))
    newData.to_csv(outputFile,index=False)
예제 #14
0
    def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(self.TrainData))

        TrainData['longitude'] -= -118600000
        TrainData['latitude'] -= 34220000
        #extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')
        #self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = LassoLars(alpha= self._lr_alpha, max_iter= self._lr_iter, verbose= True)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        #with open(self._f_eval_train_model, 'wb') as o_file:
        #    pickle.dump(self._model, o_file, -1)
        #o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
    def lasso_subproblem(self, Xt):
        '''
        function which performs:
        - 4: Sparse coding with LARS

        INPUTS:
        - self
        - Xt, data array
        - A, matrix
        - B, matrix
        - t, iter number

        OUTPUT:
        - coef
        '''
        print "inside lasso"
        # 4: Sparse coding with LARS
        from sklearn.linear_model import LassoLars
        lars = LassoLars(alpha=self.alpha, verbose=False)

        # self.components = np.matrix([[8,2,3,4],[1,6,1,99]])
        # Xt = np.matrix([[3,1],[6,7]])
        # Xt[1,1] = 9999
        lars.fit(self.components, Xt)
        coef = lars.coef_
        # print coef
        coef = (np.asmatrix(coef)).T

        # Dimension control
        if self.verbose > 20:
            print "coef shape :", coef.shape

        return coef
예제 #16
0
def OnceTest(dataMat, labelMat):
    clf1 = LinearRegression()
    clf1.fit(dataMat[0:99], labelMat[0:99])
    labelTest1 = clf1.predict(dataMat[100:199])
    print('default LinearRegression',
          ((labelTest1 - labelMat[100:199])**2).sum())
    clf2 = Ridge(alpha=1, max_iter=100, tol=0.001)
    clf2.fit(dataMat[0:99], labelMat[0:99])
    labelTest2 = clf2.predict(dataMat[100:199])
    print('Ridge alhpa=1 max_iter=100 tol=0.001',
          ((labelTest2 - labelMat[100:199])**2).sum())
    clf3 = Lasso(alpha=1, max_iter=100, tol=0.001)
    clf3.fit(dataMat[0:99], labelMat[0:99])
    labelTest3 = clf3.predict(dataMat[100:199])
    print('Lasso alhpa=1 max_iter=100 tol=0.001',
          ((labelTest3 - labelMat[100:199])**2).sum())
    clf4 = ElasticNet(alpha=1, l1_ratio=0.5, max_iter=100, tol=1e-4)
    clf4.fit(dataMat[0:99], labelMat[0:99])
    labelTest4 = clf4.predict(dataMat[100:199])
    print('ElasticNet alhpa=1 max_iter=100 tol=0.001',
          ((labelTest4 - labelMat[100:199])**2).sum())
    clf5 = LassoLars(alpha=1, max_iter=100)
    clf5.fit(dataMat[0:99], labelMat[0:99])
    labelTest5 = clf4.predict(dataMat[100:199])
    print('LassoLars alhpa=1 max_iter=100',
          ((labelTest5 - labelMat[100:199])**2).sum())
예제 #17
0
def LassoRegression(X_train, X_test, y_train, y_test):
    regr = LassoLars(alpha=0.1)
    print len(X_train.values.tolist()[0])
    print len(X_train.values.tolist())
    regr.fit(X_train.values.tolist(), y_train.values.tolist())
    predictions = regr.predict(X_test)
    return predictions
예제 #18
0
class LassoLarsPrim(primitive):
    def __init__(self, random_state=0):
        super(LassoLarsPrim, self).__init__(name='LassoLars')
        self.hyperparams = []
        self.type = 'Regressor'
        self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients."
        self.hyperparams_run = {'default': True}
        self.random_state = random_state
        self.model = LassoLars(alpha=0.1)
        self.accept_type = 'c_r'

    def can_accept(self, data):
        return self.can_accept_c(data, 'Regression')

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.model.fit(data['X'], data['Y'])

    def produce(self, data):
        output = handle_data(data)
        output['predictions'] = self.model.predict(output['X'])
        output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"])
        final_output = {0: output}
        return final_output
예제 #19
0
 def trainAlgo(self):
     self.model = LassoLars(alpha=self.param['alpha'],
                            normalize=self.param['normalize'],
                            fit_intercept=self.param['fit_intercept'],
                            max_iter=self.param['max_iter'],
                            positive=self.param['positive'])
     self.model.fit(self.inputData['X'], self.outputData['Y'])
예제 #20
0
파일: lars.py 프로젝트: micmn/benchmarks
        def RunLARSScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
            lambda1 = re.search("-l (\d+)", options)
            lambda1 = 1.0 if not lambda1 else float(lambda1.group(1))
            max_iter1 = re.search("--max_iter (\d+)", options)
            max_iter1 = 500 if not max_iter1 else int(max_iter1.group(1))
            eps1 = re.search("--eps (\d+)", options)
            eps1 = np.finfo(float).eps if not eps1 else float(eps1.group(1))
            try:
                with totalTimer:
                    # Perform LARS.
                    model = LassoLars(alpha=lambda1,
                                      max_iter=max_iter1,
                                      eps=eps1)
                    model.fit(inputData, responsesData)
                    out = model.coef_
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
예제 #21
0
def Lasso(x_train, y_train, x_test, y_test):
    estimator = LassoLars()
    estimator.fit(x_train, y_train)
    y_pred = estimator.predict(x_test)
    mse_score = mse(y_test, y_pred)
    print("mse_score: " + str(mse_score))
    r2_score = r2(y_test, y_pred)
    print("r2_score: " + str(r2_score))
예제 #22
0
def predict_LarsLasso(X, y, train, test, alpha=0.1):
    # Fit
    lars = LassoLars(alpha)
    lars.fit(X.iloc[train], y.iloc[train])

    # Predict
    prediction = lars.predict(X.iloc[test])
    return prediction
예제 #23
0
def update_spatial_perpx(y, alpha, sub, C):
    res = np.zeros_like(sub, dtype=y.dtype)
    if np.sum(sub) > 0:
        C = C[:, sub]
        clf = LassoLars(alpha=alpha, positive=True)
        coef = clf.fit(C, y).coef_
        res[np.where(sub)[0]] = coef
    return res
예제 #24
0
 def select(self,X,y,weight,alpha=0.01):
     lars = LassoLars(normalize=False,alpha=alpha)
     lars.fit(X,y)
     path_idx = np.argwhere((lars.coef_path_ != 0).sum(axis=0) <= self.n_features)[-1,0]
     coef = lars.coef_path_[:,path_idx]
     f_indices = np.argwhere(coef != 0).T[0]
     if len(f_indices) == 0:
         f_indices = self.select(X,y,alpha=alpha * 0.01)
     return f_indices
예제 #25
0
def lasso_lars(X, y):
    #train model
    lars = LassoLars(alpha=0.1)\
    .fit(X, y)

    lars_pred = lars.predict(X)

    lars_rmse = sqrt(mean_squared_error(y, lars_pred))
    return lars_rmse
예제 #26
0
 def __init__(self, random_state=0):
     super(LassoLarsPrim, self).__init__(name='LassoLars')
     self.hyperparams = []
     self.type = 'Regressor'
     self.description = "LassoLars is a lasso model implemented using the LARS algorithm, and unlike the implementation based on coordinate descent, this yields the exact solution, which is piecewise linear as a function of the norm of its coefficients."
     self.hyperparams_run = {'default': True}
     self.random_state = random_state
     self.model = LassoLars(alpha=0.1)
     self.accept_type = 'c_r'
예제 #27
0
def LassoLarsTest(dataMat, labelMat):
    clf1 = LassoLars(alpha=1, max_iter=100)
    clf1.fit(dataMat[0:99], labelMat[0:99])
    labelTest1 = clf1.predict(dataMat[100:199])
    print('LassoLars ', ((labelTest1 - labelMat[100:199])**2).sum())
    clf2 = LassoLarsCV(max_n_alphas=10, max_iter=100)
    clf2.fit(dataMat[0:99], labelMat[0:99])
    labelTest2 = clf2.predict(dataMat[100:199])
    print('LassoLarsCV', ((labelTest2 - labelMat[100:199])**2).sum())
def Lars_Lasso(kf,data,label,k):
	val=0
	for train, test in kf:
		X_train, X_test, y_train, y_test = data[train,:], data[test,:], label[train], label[test]
		log =  LassoLars(alpha=.1)
		logit = log.fit(X_train,y_train)
		y_pred =  logit.predict(X_test)
		val+= metrics.mean_squared_error(y_test, y_pred)  
	return val/3
예제 #29
0
def ll_validate_test(X, y, X_vt, y_vt):
    #train model
    lars = LassoLars(alpha=0.1)\
    .fit(X, y)

    #validate model
    lars_pred_v = lars.predict(X_vt)

    lars_rmse_v = sqrt(mean_squared_error(y_vt, lars_pred_v))
    return lars_rmse_v
예제 #30
0
 def _load_model(cls, fh):
     params = _parse_literal(fh)
     active = _parse_literal(fh)
     coef_shape = _parse_literal(fh)
     m = LassoLars().set_params(**params)
     m.intercept_ = 0.0
     n = int(np.prod(coef_shape)) * 8
     m.coef_ = np.fromstring(fh.read(n)).reshape(coef_shape)
     m.active_ = active
     return m
예제 #31
0
def scaledlasso(self, X, y, intercept, lam0=None, sigma=None):
        n, p = X.shape
        if lam0 == None:
            if p > pow(10, 6):
                lam0 = 'univ'
            else:
                lam0 = 'quantile'

        if lam0 == 'univ' or lam0 == 'universal':
            lam0 = np.sqrt(2 * np.log10(p) / n)

        if lam0 == 'quantile':
            L = 0.1
            Lold = 0
            while (np.abs(L - Lold) > 0.001):
                k = (L**4 + 2 * L**2)
                Lold = L
                L = -norm.ppf(np.min(k/p,0.99))
                L = (L + Lold) / 2
            if (p == 1):
                L = 0.5
            lam0 = np.sqrt(2 / n) * L

        sigmaint = 0.1
        sigmanew = 5
        flag = 0

        objlasso = LassoLars(fit_intercept=False,eps=0.001,fit_path=True)
        objlasso.fit(X,y)

        while abs(sigmaint - sigmanew) > 0.0001 and flag <= 100:
            flag = flag + 1
            sigmaint = np.copy(sigmanew)
            lam = lam0 * sigmaint
            s = lam * n
            lams = objlasso.alphas_
            s[np.where(s>np.max(lams))[0]]=np.max(lams)
            s[np.where(s<0)[0]]=0

            sfrac = (s-s[0])/(s[p-1]-s[0])
            s = (s-s[0])/(s[p-1]-s[0])


            hbeta = objlasso.coef_

            hy = np.dot(X,hbeta)
            sigmanew = np.sqrt(np.mean(np.square(y - hy)))

        sigmahat = sigmanew
        hlam = lam

        if sigma == None:
            sigmahat = np.sqrt(np.sum(np.square(y - hy)) / (n - np.sum(hbeta != 0)))

        return hbeta, sigmahat
예제 #32
0
class in_lassoLars(regression):
    def trainAlgo(self):
        self.model = LassoLars(alpha=self.param['alpha'],
                               normalize=self.param['normalize'],
                               fit_intercept=self.param['fit_intercept'],
                               max_iter=self.param['max_iter'],
                               positive=self.param['positive'])
        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):
        self.result['Y'] = self.model.predict(self.inputData['X'])
예제 #33
0
파일: lars.py 프로젝트: zoq/benchmarks
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            model = LassoLars(**self.build_opts)
            model.fit(self.data[0], self.data[1])
            out = model.coef_

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()

        return metric
예제 #34
0
def adaptiveLasso():
    '''
    Adaptive-Lasso变量选择模型
    :return:
    '''
    inputfile = 'data/data1.csv'
    data = pd.read_csv(inputfile)
    # 导入AdaptiveLasso算法,要在较新的Scikit-Learn才有
    from sklearn.linear_model import LassoLars
    model = LassoLars()
    model.fit(data.iloc[:, 0:13], data['y'])
    print(model.coef_)
예제 #35
0
    def fit_model_11(self,toWrite=False):
        model = LassoLars(alpha=1,max_iter=5000)

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 11 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model11/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
        def lasso_subproblem(self, Xt, comp):
                print "inside lasso"
                # 4: Sparse coding with LARS
                lars = LassoLars(alpha=self.alpha, verbose=False)

                lars.fit(comp, Xt)
                coef = lars.coef_
                # print coef
                coef = (np.asmatrix(coef)).T

                # Dimension control
                if self.verbose > 20:
                    print "coef shape :", coef.shape

                return coef
예제 #37
0
def lasso_sklearn(dict, target, gamma):
    """
    Computes Lasso optimization
    :param dict: dictionnary
    :type dict: np.array
    :param target: image
    :type target: np.array
    :param gamma: regularization factor
    :type gamma: float
    :rtype: np.array
    """

    num_samples = target.shape[1]
    patch_size = dict.shape[0]
    dic_size = dict.shape[1]
    gamma /= num_samples
    ll = LassoLars(alpha=gamma, fit_intercept=False, normalize=False, fit_path=False)
    ll.fit(dict, target)

    alpha = ll.coef_

    alpha = alpha.reshape(dic_size, num_samples)
    return alpha
예제 #38
0
# LassoLars Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LassoLars
# load the iris datasets
dataset = datasets.load_diabetes()
# fit a LASSO using LARS model to the data
model = LassoLars(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
예제 #39
0
def ProcessData(df,vect1,vect2,builder):
    descriptionmatrix = vect1.transform([str(x) for x in df['titledescription'].values])
    locationmatrix = vect2.transform([str(x) for x in df['locationfull'].values])
    # x = build_design_matrices([builder], df, return_type='dataframe', NA_action=NAAction(on_NA='drop', NA_types=[]))
    y = df['SalaryNormalized'].values
    #x_combo = np.hstack([np.asarray(x[0]),descriptionmatrix.toarray(),locationmatrix.toarray()])
    x_combo = np.hstack([descriptionmatrix.toarray(),locationmatrix.toarray()])
    return (np.asarray(y), sparse.coo_matrix(x_combo))

train = PreProcess(pd.read_csv('train.csv'))
(vect1,vect2,builder) = InitializeTransformers(train)
(y, x) = ProcessData(train, vect1, vect2,builder)

(y_test, x_test) = ProcessData(PreProcess(pd.read_csv('solution.csv')),vect1,vect2,builder)

lasso = Lasso()
lasso.fit(x,y)
y_pred = lasso.predict(x_test)

lassolars = LassoLars(alpha=2)
lassolars.fit(x.toarray(),y)
lars_pred = lassolars.predict(x_test)

print np.sqrt(mean_squared_error(y_test, y_pred))

print r2_score(y_test,y_pred)

print np.sqrt(mean_squared_error(y_test,lars_pred))

print r2_score(y_test,lars_pred)