コード例 #1
0
    def fit(self, X, y):
        assert not y is None, f'y:{y}'
        k = X.shape[1]
        self.k_ = k
        if self.max_k is None:
            if self.k_share is None:
                self.max_k = 500
            else:
                self.max_k = int(k * self.k_share)

        if self.selector is None:
            self.selector = 'Lars'
        if self.selector == 'Lars':
            selector = Lars(fit_intercept=1, normalize=1, n_nonzero_coefs=self.max_k)
        elif self.selector == 'elastic-net':
            selector = ElasticNet(fit_intercept=True, selection='random', tol=0.001, max_iter=5000, warm_start=1,
                                  random_state=0)
        else:
            selector = self.selector

        selector.fit(X, y)
        self.col_select_ = np.arange(k)[np.abs(selector.coef_) > 0.0001]
        if self.col_select_.size < 1:
            self.col_select_ = np.arange(1)
        return self
コード例 #2
0
def create_model_LARS(state_matrix, transcription_factors):
    regulators = {}

    for i in range(len(transcription_factors)):
        #Declaration for training set for the Target Gene
        X = []
        y = []

        for j in range(1, len(state_matrix)):
            X.append(state_matrix[j - 1].tolist())
            y.append(state_matrix[j][i] - state_matrix[j - 1][i])

        #Initialise the LARS Model
        lars = Lars()

        #Fit the training data into the Model
        lars.fit(X, y)

        #Extract the important features corresponding to a particular gene
        coefficients = lars.coef_

        #Add to the dictionary
        regulators[transcription_factors[i]] = coefficients

    return regulators
コード例 #3
0
ファイル: regression_test.py プロジェクト: YichaoOU/HemTools
def Lars_reg(par=False):
    est = Lars()
    if par:
        est = Lars(**par)
    myDict = {}
    myDict['n_nonzero_coefs'] = [1, 5, 10, 50, 100, 200, 300]
    return est, myDict
コード例 #4
0
    def __init__(self):
        # 알고리즘 이름
        self._name = 'lars'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = Lars(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)
コード例 #5
0
ファイル: models.py プロジェクト: somebodyyuan/superman-web
 def _load_model(cls, fh):
     params = _parse_literal(fh)
     active = _parse_literal(fh)
     coef_shape = _parse_literal(fh)
     m = Lars().set_params(**params)
     m.intercept_ = 0.0
     n = int(np.prod(coef_shape)) * 8
     m.coef_ = np.fromstring(fh.read(n)).reshape(coef_shape)
     m.active_ = active
     return m
コード例 #6
0
ファイル: MCFS.py プロジェクト: lindlind/ITMO_FS
    def run(self, X, y=None):
        """
            Fits filter

            Parameters
            ----------
            X : numpy array, shape (n_samples, n_features)
                The training input samples.
            y : numpy array, optional
                The target values (ignored).

            Returns
            ----------
            W : array-like, shape (n_features, k)
                Feature weight matrix.

            See Also
            --------

            examples
            --------
            from ITMO_FS.filters.sparse import MCFS
            from sklearn.datasets import make_classification
            import numpy as np

            dataset = make_classification(n_samples=100, n_features=20, n_informative=4, n_redundant=0, shuffle=False)
            data, target = np.array(dataset[0]), np.array(dataset[1])
            model = MCFS(d=5, k=2, scheme='heat')
            weights = model.run(data, target)
            print(model.feature_ranking(weights))

        """
        n_samples, n_features = X.shape
        graph = NearestNeighbors(n_neighbors=self.p + 1, algorithm='ball_tree').fit(X).kneighbors_graph(X).toarray()
        graph = graph + graph.T

        indices = [[(i, j) for j in range(n_samples)] for i in range(n_samples)]
        func = np.vectorize(lambda xy: graph[xy[0]][xy[1]] * self.scheme(X[xy[0]], X[xy[1]]), signature='(1)->()')
        W = func(indices)

        D = np.diag(W.sum(axis=0))
        L = D - W
        eigvals, Y = eigh(type=1, a=L, b=D, eigvals=(0, self.k - 1))

        weights = np.zeros((n_features, self.k))
        for i in range(self.k):
            clf = Lars(n_nonzero_coefs=self.d)
            clf.fit(X, Y[:, i])
            weights[:, i] = clf.coef_

        return weights
コード例 #7
0
ファイル: lars.py プロジェクト: vaisaxena/lale
class _LarsImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
コード例 #8
0
def define_clfs_params():

    clfs = {
        'RIDGE': Ridge(),
        'LINEAR': LinearRegression(),
        'LASSO': Lasso(),
        'ELNET': ElasticNet(),
        'LARS': Lars(),
        'LLARS': LassoLars(),
        'BRIDGE': BayesianRidge(),
        'PER': Perceptron()
    }

    grid = {
        'RIDGE': {
            'alpha': [0.01, 0.1, 1, 10]
        },
        'LINEAR': {},
        'LASSO': {
            'alpha': [0.01, 0.1, 1, 10]
        },
        'ELNET': {
            'alpha': [0.01, 0.1, 1, 10],
            'l1_ratio': [0.25, 0.5, 0.75]
        },
        'LARS': {},
        'LLARS': {},
        'BRIDGE': {},
        'PER': {}
    }
    return clfs, grid
コード例 #9
0
    def connectWidgets(self):
        # LARS/         # LARSCV

        lars = Lars()
        self.fit_interceptCheckBox.setChecked(lars.fit_intercept)
        self.normalizeCheckBox.setChecked(lars.normalize)
        self.n_nonzero_coefsSpinBox.setValue(lars.n_nonzero_coefs)
コード例 #10
0
ファイル: MCFS.py プロジェクト: olegtaratuhin/afsfc
 def _create_regressor(self):
     if self.mode == 'default':
         return Lars()
     if self.mode == 'lasso':
         return LassoLars(alpha=self.alpha)
     raise ValueError('Unexpected mode ' + self.mode +
                      '. Expected "default" or "lasso"')
コード例 #11
0
def get_regressors_variable(nmodels='all'):
    """
		Returns one of or all variable selection regressors
	"""

    # 1. Elastic net
    lr1 = ElasticNet()

    # 2. Elastic net
    lr2 = Lars()

    # 3. Lasso
    lr3 = Lasso()

    # 4. LassoLars
    lr4 = LassoLars()

    # 5. OrthogonalMatchingPursuit
    lr5 = OrthogonalMatchingPursuit()

    if (nmodels == 'all'):
        models = [lr1, lr2, lr3, lr4, lr5]
    else:
        models = ['lr' + str(nmodels)]

    return models
コード例 #12
0
 def fit(self, X, y):
     assert not y is None, f'y:{y}'
     k = X.shape[1]
     self.k_ = k
     if self.max_k is None:
         if self.k_share is None:
             self.max_k = k+1
         else:
             self.max_k = int(k * self.k_share)
     steps = [('scaler', StandardScaler())]
     if self.selector is None:
         self.selector = 'Lars'
     if self.selector == 'Lars':
         steps.append(('selector', Lars(fit_intercept=1, normalize=False, n_nonzero_coefs=self.max_k)))
     elif self.selector == 'elastic-net':
         steps.append(('selector', ElasticNet(fit_intercept=True, selection='random', tol=0.01, max_iter=500,
                                              warm_start=False, random_state=0, normalize=False)))
     else:
         steps.append(('selector', self.selector))
     kshrinker = Pipeline(steps=steps)
     kshrinker.fit(X, y)
     coefs = kshrinker['selector'].coef_
     self.col_select_ = np.arange(k)[np.abs(coefs) > 0.0001]
     if self.col_select_.size < 1:
         self.col_select_ = np.arange(1)
     return self
コード例 #13
0
def runmodel_sklearn(chromosome, train, test, modelname, feature, label):
    model = {
        'GBRT': GradientBoostingRegressor(max_depth=7, loss='huber'),
        #'xgb': xgb.XGBRegressor(nthread = 10,objective='reg:linear', n_estimators = 10, max_depth = 3),
        'SVR': SVR(),
        'Lasso': Lasso(),
        'Linear': LinearRegression(),
        'DecisionTree': DecisionTreeRegressor(max_depth=6),
        'RandomForest': RandomForestRegressor(random_state=1, n_jobs=12),
        'Ridge': Ridge(),
        'AdaBoost': AdaBoostRegressor(),
        'BayesianRidge': BayesianRidge(compute_score=True),
        'KNN': KNeighborsRegressor(n_neighbors=12),
        'ExtraTrees': ExtraTreesRegressor(random_state=1, n_jobs=12),
        'SGD': SGDRegressor(loss='huber', penalty='elasticnet',
                            random_state=1),
        'PassiveAggressive': PassiveAggressiveRegressor(),
        'ElasticNet': ElasticNet(),
        'Lars': Lars(),
        #'lgm': lgb.LGBMRegressor(objective='regression',num_leaves=40, learning_rate=0.1,n_estimators=20, num_threads = 10),
        #'xgb_parallel': xgb.XGBRegressor(objective='reg:linear', n_estimators = 10, max_depth = 3, nthread = 4)
    }

    newtrain = make_dataframe(chromosome, train)
    if len(newtrain) == 0:
        return 1000000000
    estimator = model[modelname]
    #return pearsonr(estimator.fit(newtrain[feature], newtrain[label]).predict(test[feature]), test[label])[0]
    estimator.fit(newtrain[feature], newtrain[label])
    return np.sqrt(
        np.power(estimator.predict(test[feature]) - test[label],
                 2).mean()) / np.sqrt(np.power(test[label], 2).mean())
コード例 #14
0
ファイル: lars.py プロジェクト: toandaominh1997/automlkiller
    def __init__(self, **kwargs):
        super().__init__()
        tune_grid = {}
        tune_distributions = {}

        tune_grid = {
            "fit_intercept": [True, False],
            "normalize": [True, False],
            "eps": [
                0.00001,
                0.0001,
                0.001,
                0.01,
                0.05,
                0.0005,
                0.005,
                0.00005,
                0.02,
                0.007,
                0.1,
            ],
        }
        tune_distributions = {
            "eps": UniformDistribution(0.00001, 0.1),
        }

        self.tune_grid = tune_grid
        self.tune_distributions = tune_distributions
        self.estimator = Lars(**kwargs)
コード例 #15
0
ファイル: lars.py プロジェクト: andresdigiovanni/adnlearn
def get_hyperparameters_model():
    param_dist = {}

    clf = Lars()

    model = {'lars': {'model': clf, 'param_distributions': param_dist}}
    return model
コード例 #16
0
 def getModel(self, _params):
   return Lars(
     fit_intercept= _params['fit_intercept'],
     normalize= _params['normalize'],
     eps= _params['eps'],
     copy_X= _params['copy_X'],
   )
コード例 #17
0
def get_classifier_scores(pred_labels, mixed_data, target_nmf_pp_data, target_data, use_lasso=True, n_nonzeros=10):
    aris = np.zeros(5)
    active_genes = range(mixed_data.shape[0])
    include_inds = []
    pred_lbls = np.unique(pred_labels)
    for p in pred_lbls:
        inds = np.where(pred_labels == p)[0]
        if inds.size >= 2:
            include_inds.extend(inds)
    if len(include_inds) > 2:
        if use_lasso:
            cls = OneVsRestClassifier(Lars(fit_intercept=True, normalize=True, copy_X=True,
                                           n_nonzero_coefs=50)).fit(mixed_data[:, include_inds].T.copy(),
                                                                    pred_labels[include_inds].copy())
            # collect active indices
            active_genes = []
            for e in range(len(cls.estimators_)):
                active_genes.extend(cls.estimators_[e].active_)
            active_genes = np.unique(active_genes)
            print active_genes
            print active_genes.shape
        else:
            cls = OneVsRestClassifier(LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)).fit(
                mixed_data[:, include_inds].T.copy(), pred_labels[include_inds].copy())

        ret = cls.predict(target_data[:, include_inds].T.copy())
        aris[4] = metrics.adjusted_rand_score(ret, pred_labels[include_inds].copy())

    aris[0] = unsupervised_acc_kta(target_data[active_genes, :].copy(), pred_labels.copy(), kernel='linear')
    aris[1] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='euclidean')
    aris[2] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='pearson')
    aris[3] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='spearman')
    return aris
コード例 #18
0
def ml_lars(X_train, y_train, cv=5, beta=0.75, params=None):
    model = Lars()

    if not params:
        params = {'n_nonzero_coefs': [n for n in range(30, 150, 20)]}

    max_score = 0
    best_t = 0
    best_model = ""
    best_grid = ""

    for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6]:
        scorer = make_scorer(new_scorer, threshold=t, beta=beta)
        model_grid = GridSearchCV(model,
                                  param_grid=params,
                                  scoring=scorer,
                                  cv=cv,
                                  verbose=0,
                                  n_jobs=-1)
        model_grid.fit(X_train, y_train)

        if max_score < model_grid.best_score_:
            best_model = model_grid.best_estimator_
            best_t = t
            best_grid = model_grid

    model_grid = best_grid
    best_model = best_grid.best_estimator_

    print("Best Score : {}".format(model_grid.best_score_))
    print("Threshold :", best_t)
    print("Best Params : {}".format(model_grid.best_params_))
コード例 #19
0
 def __init__(self, X, y, lar_params, nfolds=3, n_jobs=1, scoring=None, verbose=True):
     
     self._code="lar"
     
     if verbose:
         print ("Constructed Lars: " +self._code)
     
     AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, lar_params, nfolds, n_jobs, scoring, verbose)
     self._model = self.constructRegressor(Lars())
コード例 #20
0
 def connectWidgets(self):
     lars = Lars()
     self.fit_intercept_listWidget.setCurrentItem(
         self.fit_intercept_listWidget.findItems(str(lars.fit_intercept),
                                                 QtCore.Qt.MatchExactly)[0])
     self.normalize_list.setCurrentItem(
         self.normalize_list.findItems(str(lars.normalize),
                                       QtCore.Qt.MatchExactly)[0])
     self.n_nonzero_coefsLineEdit.setText(str(lars.n_nonzero_coefs))
コード例 #21
0
ファイル: LAR.py プロジェクト: vishalbelsare/networks
def perform_LARS(normalized_matrix,genes):
	#Number of Genes
	no_genes = len(genes)

	#Dictionary for top regulators for each gene
	regulators = {}
    
	for i in range(0,no_genes):
		#Current Gene for which the Top Regulators are being found
		current_y = normalized_matrix[:,i]

		#Create a copy of the matrix
		temp_matrix = normalized_matrix.copy()

		#Remove the current feature
		temp_matrix = np.delete(temp_matrix,i,axis=1)		

		#Computation of the coefficients after training with Least Angle Regression Method
		coefficients = Lars()

		#Fit the Model
		coefficients.fit(temp_matrix,current_y)

		#Coefficient values
		coeff_values = coefficients.coef_

		#Copy the genes into a temporary list
		gene_copy = list(genes)

		#Remove the Gene to create the appropriate indexes
		gene_copy.remove(genes[i])
        
        #Perform Stability Selection to get an effective rank of the top regulators
		rank_dict_score = stability_selection(temp_matrix,genes,2000,current_y,gene_copy)

		#Top Regulators
		top_regulators = find_top_regulators(rank_dict_score)

		#Append to regulators
		regulators[genes[i]] = top_regulators	


	return regulators
コード例 #22
0
def run_Lars(single_time):
    save_folder = 'Lars'
    model = Lars()
    if single_time:
        comparison_algorithm.training_test_with_sklearnmodel(
            save_folder=save_folder, model=model)
    else:
        save_folder += '10t'
        comparison_algorithm.training_test_10times_sklearnmodel(
            save_folder=save_folder, model=model)
コード例 #23
0
 def run(self):
     params = {
         'fit_intercept': self.fit_interceptCheckBox.isChecked(),
         'verbose': False,
         'normalize': self.normalizeCheckBox.isChecked(),
         'precompute': 'auto',
         'n_nonzero_coefs': self.n_nonzero_coefsSpinBox.value(),
         'copy_X': True,
         'fit_path': True
     }
     return params, self.getChangedValues(params, Lars())
コード例 #24
0
def LarsRegressorGS(X_train, X_test, y_train, y_test):
    reg = Lars()
    grid_values = {
        'n_nonzero_coefs': list(range(100, 500, 100)),
    }
    grid_reg = GridSearchCV(
        reg,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg.fit(X_train, y_train)
    reg = grid_reg.best_estimator_
    reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred = reg.predict(X=X_train)
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params: dict = grid_reg.best_params_
    saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params)
    logSave(nameOfModel="LarsRegressorGS",
            reg=reg,
            metrics=metrics,
            val_metrics=val_metrics)
コード例 #25
0
def load_Least_Angle_Regression():
    '''
    Load Least Angle Regression and gives a name for the output files.
    
    Parameters : None
    
    Returns    : model_name : (str) Name of the model for output file.
                       regr : (REgressor) Longitude and Latitude Regressor
    '''
    model_name = "Least Angle Regression"
    regr = Lars()

    return model_name, regr
コード例 #26
0
def get_models(models=dict()):
    models['lr'] = LinearRegression()
    models['lasso'] = Lasso()
    models['ridge'] = Ridge()
    models['en'] = ElasticNet()
    models['huber'] = HuberRegressor()
    models['lars'] = Lars()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    models['ranscac'] = RANSACRegressor()
    models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
    print('Defined %d models' % len(models))
    return models
コード例 #27
0
def choose_regression_algorithm(method = "LR"):

    if method == "LR":
        regression_algorithm = LinearRegression()

    elif method == "Lasso":
        regression_algorithm = Lasso()

    elif method == "Ridge":
        regression_algorithm = Ridge()

    elif method == "HR":
        regression_algorithm = HuberRegressor()        
        
    elif method == "SVR":
        regression_algorithm = SVR()
        
    elif method == "LL":
        regression_algorithm = LassoLars()

    elif method == "ARDR":
        regression_algorithm = ARDRegression()

    elif method == "BR":
        regression_algorithm = BayesianRidge()

    elif method == "ElasticNet":
        regression_algorithm = ElasticNet()

    elif method == "Lars":
        regression_algorithm = Lars()

    elif method == "PA":
        regression_algorithm = PassiveAggressiveRegressor()

    elif method == "RANSAC":
        regression_algorithm = RANSACRegressor()

    elif method == "TS":
        regression_algorithm = TheilSenRegressor()

    elif method == "LP":
        regression_algorithm = lars_path()

    elif method == "RR":
        regression_algorithm = ridge_regression()
        
    else:
        print("You haven't chosen a valide classifier!!!")
    print("method used:\t", method)   
    return regression_algorithm
コード例 #28
0
    def runLarsRegressor(self):
        lm = Lars(fit_intercept=True, normalize=True)

        print("Lars Regressor\n")
        lm.fit(self.m_X_train, self.m_y_train)
        predictY = lm.predict(self.m_X_test)
        score = lm.score(self.m_X_test, self.m_y_test)
        predictTraingY = lm.predict(self.m_X_train)

        self.displayPredictPlot(predictY)
        self.displayResidualPlot(predictY, predictTraingY)
        self.dispalyModelResult(lm, predictY, score)
コード例 #29
0
def create_model_LARS(state_matrix, transcription_factors):
    regulators = {}

    for i in range(0, len(transcription_factors)):
        #Create the training set
        X = []
        y = []
        for j in range(1, len(state_matrix)):
            #Append the expression level of the previous step
            X.append(state_matrix[j - 1].tolist())

            #The output value is the difference / rate of change of expression
            y.append(state_matrix[j][i] - state_matrix[j - 1][i])

        #Copy the list of Transcription Factors
        tf_copy = list(transcription_factors)

        #Remove the current transcription factor
        tf_copy.remove(tf_copy[i])

        #Remove the corresponding column from the training set
        [expression.remove(expression[i]) for expression in X]
        """ Feature Selection using Least Angle Regression """

        #Initialise the model using Least Angle Regression
        lars = Lars()

        #Fit the training data into the Model
        lars.fit(X, y)

        #Extract the important features corresponding to a particular gene
        coefficients = lars.coef_

        #Regulators for the Network
        regulators[transcription_factors[i]] = coefficients

    return regulators
コード例 #30
0
def main():
    from_root = "~/Documents/School/ComputerScience/ahcompsci/Scikit-Learning-StanleyWei/scikit-utkproject/dataset/fiftytwo"
    path = "dataset/whitemensmall/"
    dirs = os.listdir(path)
    main_df = add_images_from_dirs(dirs, path)

    train_images, test_images = train_test_split(main_df.loc[:, "image"],
                                                 main_df.loc[:, "gender"])

    # train_df = train_df.loc[train_df['ethnicity'] == "0"]
    # test_df = test_df.loc[test_df['ethnicity'] == "0"]
    train_x = flatten_image_df(train_images)
    test_x = flatten_image_df(test_images)

    clf = Lars()
    # train_x = np.array(train_df.loc[:, "image"])
    # x_train = train_x.flatten().reshape(len(train_df), -1)
    clf.fit(train_x, train_df.loc[:, "age"].to_numpy())

    coefficients = clf.coef_
    # print(coefficients)
    coefficients_array = np.array(coefficients).reshape(
        len(train_df.image[0]), -1)
    # print(coefficients_array)
    # heatmap = plt.imshow(coefficients_array, cmap = "hot", interpolation = "nearest")
    coefficients_abs = coefficients
    for i in range(len(coefficients_abs)):
        coefficients_abs[i] = abs(coefficients_abs[i])
    coefficients_array_abs = np.array(coefficients_abs).reshape(
        len(train_df.image[0]), -1)
    heatmap = plt.imshow(coefficients_array_abs,
                         cmap="hot",
                         interpolation="nearest")
    # heatmap_extremes = plt.imshow(coefficients_array_abs, vmax = 0.025, cmap = "hot", interpolation = "nearest")
    plt.colorbar(heatmap)
    # plt.colorbar(heatmap_extremes)
    plt.show()
コード例 #31
0
ファイル: blend.py プロジェクト: bjcohen/kaggle
                    # ('ppru', 'ppr_submission_user.csv', 'ppr_fitted_user.csv'),
                    # ('pprg', 'ppr_submission_global.csv', 'ppr_fitted_global.csv'),
                    ]

    fitted = pd.DataFrame(index=review_data.index)
    submission = pd.DataFrame(index=review_data_final.index)
    for name, sub_name, fit_name in blend_inputs:
        f_df = pd.read_csv(os.path.join('..', fit_name))
        f_df.index = review_data.index
        fitted[name] = f_df['stars']
        s_df = pd.read_csv(os.path.join('..', sub_name))
        s_df.index = review_data_final.index
        submission[name] = s_df['stars']

    gbr = GradientBoostingRegressor(max_depth=3,verbose=2)
    gbr.fit(fitted, review_data['stars'])
    pred = gbr.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../gbr_submission.csv', index=False)

    lar = Lars(fit_intercept=True, verbose=2, normalize=True, fit_path=True)
    lar.fit(fitted, review_data['stars'])
    pred = lar.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../lar_submission.csv', index=False)

    ridge = Ridge()
    ridge.fit(fitted, review_data['stars'])
    pred = ridge.predict(submission)
    pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../ridge_submission.csv', index=False)
    
    ## TODO: blend based on size of rating neighborhood
コード例 #32
0
 def larsLearn(kap):
     lars = Lars(n_nonzero_coefs=kap,fit_intercept=False)
     lars.fit(X_train,y_train)
     return lars
コード例 #33
0
    lasso_beta = np.array([lasso.coef_]).T
    lasso_gamma = np.array([[0. if abs(x) < 1e-100 else 1. for x in lasso.coef_]]).T
    # P = lambda X: lasso.predict(X)
    lasso_predictor = PredictorWrapper.PredictorWrapper(lasso_beta,lasso_gamma,lasso.predict)
    dill.dump(lasso_predictor,open('%sLASSO.p' % logDir,'wb'))
    with open(logFile,'a') as f:
        f.write('Lasso c: %15.10f        alpha: %15.10f\n' % (1./(2.* X_tr.shape[0]), optLam))



    ##############
    ## LARS_SET ##
    ##############
    kappa = [2,4,10]
    for k in kappa:
        lars = Lars(n_nonzero_coefs=k,fit_intercept=False)
        lars.fit(X_tr,y_tr)
        lars_beta = np.array([lars.coef_]).T
        lars_gamma = np.zeros((X_tr.shape[1],1))
        lars_gamma[lars.active_] = 1.
        lars_predictor = PredictorWrapper.PredictorWrapper(lars_beta,lars_gamma,lars.predict)
        dill.dump(lars_predictor,open('%sLARS_%02d.p' % (logDir,k),'wb'))

    ##############
    ## LARS_OPT ##
    ##############
    larsKappas = np.linspace(0,40,41,dtype=int)

    def larsEval(learned):
        learned_yhat = np.array([learned.predict(X_val)]).T
        learned_mse = sum((y_val - learned_yhat) ** 2)[0]
コード例 #34
0
model = sm.OLS(housing['labels'], housing['data'])

results = model.fit()

print results.summary()

# Part B
preds_train = lin.predict(housing['data'])
preds_test = lin.predict(housing['testdata'])

ave_sq_loss_train = ((housing['labels'] - preds_train) ** 2).sum()/len(housing['data'][:,1])

ave_sq_loss_test = ((housing['testlabels'] - preds_test) ** 2).sum()/len(housing['testdata'][:,1])

print ave_sq_loss_train
print ave_sq_loss_test

# Part C
housing['data'] = housing['data'][:,1:14]
housing['testdata'] = housing['testdata'][:,1:14]

from sklearn.linear_model import Lars

reduced = Lars(fit_intercept = True, n_nonzero_coefs = 3)

reduced.fit(housing['data'], housing['labels'])

print reduced.intercept_
print reduced.coef_
コード例 #35
0
ファイル: Linear Models.py プロジェクト: chenzhongtao/source
new_reg_data = reg_data[:, mask]
print new_reg_data.shape
#(200, 11)

#Taking a more fundamental基本的 approach to regularization正则化 with LARS

#Least-angle regression (LARS) is a regression technique that is well suited for 
#high-dimensional problems, that is, p >> n, where p denotes the columns or features 
#and n is the number of samples.

from sklearn.datasets import make_regression
reg_data, reg_target = make_regression(n_samples=200,
                                           n_features=500, n_informative=10, noise=2)
                                           
from sklearn.linear_model import Lars
lars = Lars(n_nonzero_coefs=10)
lars.fit(reg_data, reg_target)
print np.sum(lars.coef_ != 0)
#10

train_n = 100
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])
lars_500 = Lars() # it's 500 by default
lars_500.fit(reg_data[:train_n], reg_target[:train_n]);
#Now, to see how well each feature fit the unknown data, do the following:
np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2))
#31.527714163321001
np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))
#9.6198147535136237e+30
コード例 #36
0
ファイル: LARS.py プロジェクト: tobrund/Engelhardt_DPP
    print "<<<<< N = %d >>>>>" % n

    cdg = CDG.CollinearDataGenerator(p = 20,sparsity=.8)
    X = cdg.getX(n)
    p = X.shape[1]
    y = cdg.getY(X)

    print cdg.gamma

    val_size = int(0.1 * X.shape[0])
    X_val = X[0:val_size,:]
    y_val = y[0:val_size,:]
    X_train = X[val_size:,:]
    y_train = y[val_size:,:]

    lars = Lars(n_nonzero_coefs=2)
    lars.fit(X,y)
    # print lars.coef_

    alphas, order, coefs = lars_path(X,y.T[0],verbose=True)
    # print alphas
    print order
    magnitudes = sorted(list(enumerate(coefs[:,-1])),key=lambda x: x[1])
    magnitudes = map(lambda x: x[0],magnitudes)
    print magnitudes
    # print coefs
    quantities = coefs[:,-1]
    quantities = np.array([quantities[i] for i in order])
    # print quantities
    total = sum(abs(quantities))
    # # print total
コード例 #37
0
# LARS Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Lars
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a LARS model to the data
model = Lars()
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))