Esempio n. 1
0
def GLS(X,y,model = "ols",**kwargs):
    """
    model = "ols","ridge","lasso","lar"
    """
    if model == "ols":
        md1 = linear_model.LinearRegression(fit_intercept=True).fit(X,y)
        md0 = linear_model.LinearRegression(fit_intercept=False).fit(X,y)
    if model == "ridge":
        alpha = get_alpha(kwargs,default=10**0.5)
        md1 = linear_model.Ridge(alpha=alpha,fit_intercept=True).fit(X,y)
        md0 = linear_model.Ridge(alpha=alpha, fit_intercept=False).fit(X, y)
    if model == 'lasso':
        alpha = get_alpha(kwargs, default=0.1)
        md1 = linear_model.Lasso(alpha=alpha,fit_intercept=True).fit(X,y)
        md0 = linear_model.Lasso(alpha=alpha, fit_intercept=False).fit(X, y)
    if model == 'lar':
        """
        TO DO
        """
        md1 = linear_model.Lars(fit_intercept=True).fit(X,y)
        md0 = linear_model.Lars(fit_intercept=False).fit(X,y)
    if model == 'kernel':
        alpha, kernel, gamma, degree, coef0 = get_kernel_coef(kwargs["alpha"])
        md1 = kernel_ridge.KernelRidge(alpha=alpha,kernel=kernel,gamma=gamma,degree=degree,coef0=coef0).fit(X,y)
        md0 = md1
    if model == 'xgb':
        md1 = xgb.XGBRegressor().fit(X,y)
        md0 = md1
    return {"1":md1,
            "-1":md0,
            "type":'GLS'}
def test_multitarget():
    # Assure that estimators receiving multidimensional y do the right thing
    X = diabetes.data
    Y = np.vstack([diabetes.target, diabetes.target**2]).T
    n_targets = Y.shape[1]
    estimators = [
        linear_model.LassoLars(),
        linear_model.Lars(),
        # regression test for gh-1615
        linear_model.LassoLars(fit_intercept=False),
        linear_model.Lars(fit_intercept=False),
    ]

    for estimator in estimators:
        estimator.fit(X, Y)
        Y_pred = estimator.predict(X)
        alphas, active, coef, path = (estimator.alphas_, estimator.active_,
                                      estimator.coef_, estimator.coef_path_)
        for k in range(n_targets):
            estimator.fit(X, Y[:, k])
            y_pred = estimator.predict(X)
            assert_array_almost_equal(alphas[k], estimator.alphas_)
            assert_array_almost_equal(active[k], estimator.active_)
            assert_array_almost_equal(coef[k], estimator.coef_)
            assert_array_almost_equal(path[k], estimator.coef_path_)
            assert_array_almost_equal(Y_pred[:, k], y_pred)
def main():

    X_train, y_train, X_test = load_data('v2')

    # tunning
    model = linear_model.Lars()

    # dict with tunning parameters
    param_grid = {'n_nonzero_coefs': range(50, 70, 1)}

    kfold = KFold(n_splits=folds, random_state=seed)

    scorer = make_scorer(rmse, greater_is_better=False)
    grid_search = GridSearchCV(model,
                               param_grid,
                               n_jobs=-1,
                               cv=kfold,
                               verbose=1,
                               scoring=scorer)
    grid_result = grid_search.fit(X_train, y_train)

    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']

    for mean, stdev, param in zip(means, stds, params):
        #print("%f (%f) with: %r" % (mean, stdev, param))
        print("{:06.5f} ({:06.5f}) with {}".format(mean, stdev, param))

    # summarize results
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
Esempio n. 4
0
    def train(self, dataset):
        model = linear_model.Lars()
        independent_data = self.get_independent_variable_data(dataset)
        dependent_data = self.get_dependent_variable_data(dataset)
        trained_regression = model.fit(independent_data, dependent_data)

        return TrainedSklearnModel(self, trained_regression)
Esempio n. 5
0
def boston_lars(x=datasets.load_boston()['data'], y=datasets.load_boston()['target']):
    print('Least Angle Regression\n')
    model = linear_model.Lars()
    num_train = 500
    num_val = 6
    x_train_ones = numpy.ones((num_train, 1))
    x_train = numpy.column_stack((x_train_ones, numpy.array(x[0:num_train])))
    y_train = y[0:num_train]
    x_val_ones = numpy.ones((num_val, 1))
    x_val = numpy.column_stack((x_val_ones, numpy.array(x[num_train:num_train+num_val])))
    y_val = y[num_train:num_train+num_val]
    print('Number of training data: %i' % len(x_train))
    print('Number of validation data: %i' % len(x_val))

    coef = model.fit(x_train, y_train).coef_
    variance = 0
    print('Coefficient: %s' % coef)

    for i in range(0, num_val):
        x = x_val[i]
        y = y_val[i]
        hypo = model.predict([x])[0]
        print('\n%i.' % (i + 1))
        print('X: %s\nHypothesis: %s \ny: %s\nVariance: %s' % (x, hypo, y, (hypo - y) ** 2))
        variance += (hypo - y_val[i]) ** 2
        plt.scatter(i + 1, hypo, c='b')
        plt.scatter(i + 1, y, c='g')

    mean_variance = variance/num_val
    print('\nMean Variance: %s\n' % mean_variance)
    plt.show()
Esempio n. 6
0
    def scikit_Lars(self):
        """Function to generate a multiple regression model sklearn.linear_model.Lars"""

        # import libraries
        from sklearn import linear_model
        import numpy as np

        # Get a model
        columns = len(self.x_in[0])
        model = linear_model.Lars(n_nonzero_coefs=columns, eps=1e-17)
        model.fit(self.x_in, self.y_in)

        # Generate string with regression equation
        self.reg_model = str("y = %8.3f" % float(model.intercept_))
        for i in range(columns):
            aux_string = str(" + %8.3f" % float(model.coef_[i]) + "x" +
                             str(i + 1))
            self.reg_model += aux_string

        # Get Multiple model
        alphas = []
        alphas.append(float(model.intercept_))
        for i in range(columns):
            alphas.append(model.coef_[i])

        # Set up array with coefficients
        self.b = np.array(alphas)
Esempio n. 7
0
def test_lars_add_features(verbose=False):
    """
    assure that at least some features get added if necessary

    test for 6d2b4c
    """
    linear_model.Lars(verbose=verbose, fit_intercept=True).fit(
       np.array(
       [[0.02863763,   0.88144085, -0.02052429,
           -0.10648066, -0.06396584, -0.18338974],
        [0.02038287,   0.51463335, -0.31734681,
            -0.12830467,  0.16870657,  0.02169503],
        [0.14411476,   0.37666599,  0.2764702,
            0.0723859, -0.03812009,  0.03663579],
        [-0.29411448,  0.33321005,  0.09429278,
            -0.10635334,  0.02827505, -0.07307312],
        [-0.40929514,  0.57692643, -0.12559217,
            0.19001991, 0.07381565, -0.0072319],
        [-0.01763028,  1.,  0.04437242,
            0.11870747,  0.1235008, -0.27375014],
        [-0.06482493,  0.1233536,   0.15686536,
            0.02059646, -0.31723546,  0.42050836],
        [-0.18806577,  0.01970053,  0.02258482,
            -0.03216307,  0.17196751,  0.34123213],
        [0.11277307,  0.15590351,  0.11231502,
            0.22009306,  0.1811108,  0.51456405],
        [0.03228484, -0.12317732, -0.34223564,
            0.08323492, -0.15770904,  0.39392212],
        [-0.00586796,  0.04902901,  0.18020746,
            0.04370165, -0.06686751,  0.50099547],
        [-0.12951744,  0.21978613, -0.04762174,
            -0.27227304, -0.02722684,  0.57449581]]),
    np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]))
def main(predictions=False):

    train = pd.read_csv("./data/X_train_v1.csv")

    y_train = train['SalePrice']
    X_train = train.loc[:, 'MSSubClass':'SaleCondition_Partial']

    test = pd.read_csv("./data/X_test_v1.csv")
    X_test = test.loc[:, 'MSSubClass':'SaleCondition_Partial']

    # build model
    model = linear_model.Lars(n_nonzero_coefs=64)

    # fit model
    model.fit(X_train, y_train)

    # evaluate model
    results = rmse_cv(model, X_train, y_train)
    print("RMSE-{}-CV({})={:06.5f}+-{:06.5f}".format(model_label, folds,
                                                     results.mean(),
                                                     results.std()))

    # # predict
    if predictions:
        y_test_pred_log = model.predict(X_test)
        y_test_pred = np.expm1(y_test_pred_log)
        submission = pd.DataFrame({'Id': test['Id'], 'SalePrice': y_test_pred})

        subFileName = "./submissions/sub-" + model_label + "-" + time.strftime(
            "%Y%m%d-%H%M%S") + ".csv"
        print("saving to file: " + subFileName)
        submission.to_csv(subFileName, index=False)
Esempio n. 9
0
    def run(self, x: np.ndarray, y: np.ndarray, design_matrix: np.ndarray):
        """
        Implements the LAR method to compute the polynomial_chaos coefficients. 
        Recommended only for model_selection algorithm.

        :param x: :class:`numpy.ndarray` containing the training points (samples).
        :param y: :class:`numpy.ndarray` containing the model evaluations (labels) at the training points.
        :param design_matrix: matrix containing the evaluation of the polynomials at the input points **x**.
        :return: Beta (polynomial_chaos coefficients)
        """
        polynomialbasis = design_matrix
        P = polynomialbasis.shape[1]
        n_samples, inputs_number = x.shape

        reg = regresion.Lars(fit_intercept=self.fit_intercept, verbose=self.verbose,
                             n_nonzero_coefs=self.n_nonzero_coefs, normalize=self.normalize)
        reg.fit(design_matrix, y)

        # LarsBeta = reg.coef_path_
        c_ = reg.coef_

        self.Beta_path = reg.coef_path_

        if c_.ndim == 1:
            c_ = c_.reshape(-1, 1)

        return c_, None, np.shape(c_)[1]
Esempio n. 10
0
def get_regression_models():
    models = [('LR', linear_model.LinearRegression()),
              ('R', linear_model.Ridge()),
              ('Lo', linear_model.Lasso(alpha=.015)),
              ('La', linear_model.Lars(positive=True)),
              ('OMP', linear_model.OrthogonalMatchingPursuit()),
              ('BR', linear_model.BayesianRidge()),
              ('GB',
               ensemble.GradientBoostingRegressor(alpha=0.9,
                                                  criterion='friedman_mse',
                                                  init=None,
                                                  learning_rate=0.1,
                                                  loss='ls',
                                                  max_depth=5,
                                                  max_features=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=.0,
                                                  n_estimators=400,
                                                  presort='auto',
                                                  random_state=None,
                                                  subsample=1.0,
                                                  verbose=0,
                                                  warm_start=False)),
              ('RF', ensemble.RandomForestRegressor()),
              ('AB', ensemble.AdaBoostRegressor())]
    return models
Esempio n. 11
0
def linear_regression(df, id, column_to_predict, n):
    df = df[df.id == id]
    x = df.drop(['id', 'timestamp', column_to_predict],
                axis=1).fillna(0).values
    y = df[[column_to_predict]].fillna(0).values[:, 0]

    pca = PCA(n_components=n, whiten=True)
    x = pca.fit_transform(x)

    alphas_lars, _, coef_path_lars = linear_model.lars_path(x,
                                                            y,
                                                            method='lars')
    # coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1], coef_path_lars[:, ::-1])
    xx = np.sum(np.abs(coef_path_lars.T), axis=1)
    xx /= xx[-1]
    plt.plot(xx, coef_path_lars.T)
    ymin, ymax = plt.ylim()
    plt.vlines(xx, ymin, ymax, linestyle='dashed')
    plt.xlabel('|coef|/max|coef|')
    plt.ylabel('Coefficients')
    plt.axis('tight')
    plt.show()

    res = linear_model.Lars(n_nonzero_coefs=n,
                            fit_intercept=True,
                            normalize=True)
    res.fit(x, y)
    print(res.score(x, y))
def test_all():
    dp = DataPreprocessor()
    dp.read_all()

    models = [
        GradientBoostingRegressor(),
        MLPRegressor(),
        DecisionTreeRegressor(),
        GaussianProcessRegressor(),
        KNeighborsRegressor(),
        svm.SVR(),
        KernelRidge(),
        linear_model.HuberRegressor(),
        linear_model.BayesianRidge(),
        linear_model.LassoLars(alpha=.1),
        linear_model.Lars(n_nonzero_coefs=25),
        linear_model.ElasticNet(tol=1),
        linear_model.Lasso(alpha=0.1, tol=0.1),
        linear_model.Lasso(alpha=0.3, tol=1),
        LinearRegression(),
        linear_model.Ridge(),
    ]
    results = []
    print(0)
    for model in models:
        res = cross_val_score(model,
                              dp.train_inputs,
                              dp.train_outputs,
                              cv=KFold(n_splits=20))
        results.append([res.mean(), res.std(), model])
        print(1)
    for r in sorted(results):
        print(r[2], '\nAccuracy (mean std): {:.4f} {:.4f}'.format(r[0], r[1]),
              '\n--------------')
Esempio n. 13
0
def test_lars_n_nonzero_coefs(verbose=False):
    lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
    lars.fit(X, y)
    assert_equal(len(lars.coef_.nonzero()[0]), 6)
    # The path should be of length 6 + 1 in a Lars going down to 6
    # non-zero coefs
    assert_equal(len(lars.alphas_), 7)
Esempio n. 14
0
def predict_author(X, matrix_ids, new_text_vect, columns, model):

	#Lasso
	if model == "lasso":
		model_reg = linear_model.Lasso(alpha = 1.0, fit_intercept=True, max_iter=10000, tol=0.0001)
	
	elif model == "lars":
		model_reg = linear_model.Lars(fit_intercept=True)
	
	model_reg.fit(X, new_text_vect)

	y_s = []

	#Calculate distances and predict author
	w_predicted = model_reg.coef_
	num_authors = X.shape[1]
	residuals = []
	for i in range(num_authors):
		w = np.array([0.0]*num_authors, dtype = float)
		w[i] = w_predicted[i]
		y_hat = np.dot(X,w)
		residuals.append((np.linalg.norm(y_hat-new_text_vect), matrix_ids[i], y_hat))
		y_s.append(y_hat)

	if columns > 1:
		return str(math.floor(int(min(residuals)[1])))
	else:
		return min(residuals), y_s
Esempio n. 15
0
def lars(signals, dictionary, n_nonzero=0, alpha=0, lars_params=None, **kwargs):
    """
        "Homotopy" algorithm for solving the Lasso

            argmin 0.5*||X - DA||_2^2 + r*||A||_1

        for all r.

        This algorithm is supposedly the most accurate for l1
        regularization.

        This is terribly slow, and not very accurate. ~20x slower
        than OMP. Find this strange as OMP solves a NP-Hard problem
        and this a convex

        :param signals:
            Signals to encode. Shape (signal_size, n_signals) or (signal_size,)

        :param dictionary:
            Dictionary, shape (signal_size, n_atoms)

        :param n_nonzero:
            Number of nonzero coefficients to use

        :param alpha:
            Regularization parameter. Overwrites n_nonzero

        :param lars_params:
            See sklearn.linear_models.LassoLars docs

        :param kwargs:
            Not used. Just to make calling API for all regularization algorithms the same

        :return:
            Sparse codes, shape (n_atoms, n_signals) or (n_atoms,)

    """
    params = {
        'precompute': True,
        'fit_path': False,
        'normalize': True
    }

    if n_nonzero > 0 and alpha == 0:
        params['n_nonzero_coefs'] = int(n_nonzero)
        model = linear_model.Lars()
    elif alpha > 0:
        params['alpha'] = alpha
        model = linear_model.LassoLars()
    else:
        raise ValueError('Need to specify either regularization '
                         'parameter alpha or number of nonzero '
                         'coefficients n_nonzero')

    if isinstance(lars_params, dict):
        params.update(lars_params)

    model.set_params(**params)
    model.fit(dictionary.copy(), signals)
    return model.coef_.T.copy()
Esempio n. 16
0
 def test_model_lars(self):
     model, X = fit_regression_model(linear_model.Lars())
     model_onnx = convert_sklearn(
         model,
         "lars", [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X, model, model_onnx, basename="SklearnLars-Dec4")
Esempio n. 17
0
    def __init__(self):
        self.mortality_predictor = linear_model.ARDRegression()

        self.natality_predictor = linear_model.Lars()

        self.migration_predictor = linear_model.LinearRegression()

        self.mortality_offset = 1000000
Esempio n. 18
0
def train_models(x, y):
    model1 = linear_model.Lars(n_nonzero_coefs=1)
    model2 = linear_model.ElasticNetCV()
    model3 = linear_model.BayesianRidge()
    model1.fit(x, y)
    model2.fit(x, y)
    model3.fit(x, y)
    return [model1, model2, model3]
Esempio n. 19
0
def test_lars_add_features():
    # assure that at least some features get added if necessary
    # test for 6d2b4c
    # Hilbert matrix
    n = 5
    H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
    clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n))
    assert np.all(np.isfinite(clf.coef_))
Esempio n. 20
0
def test_sk_Lars():
    print("Testing sklearn, Lars...")
    mod = linear_model.Lars()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "Lars test"}
    fv = X[0, :]
    upload(mod, fv, docs)
Esempio n. 21
0
def get_all_regrs():
    regrs = {
        "Linear regression":
        linear_model.LinearRegression(),
        # "Perceptron": linear_model.Perceptron(),
        "Lars":
        linear_model.Lars(),
        "Lasso":
        linear_model.LassoCV(max_iter=5000),
        # "Passive Aggressive": linear_model.PassiveAggressiveRegressor(),
        "PLS":
        PLS(n_components=3),
        "Random Forest":
        ensemble.RandomForestRegressor(),
        "Gradient Boost":
        ensemble.GradientBoostingRegressor(),
        "Extra Trees":
        ensemble.ExtraTreesRegressor(max_depth=2),
        "Ada Boost":
        ensemble.AdaBoostRegressor(
            base_estimator=tree.DecisionTreeRegressor(max_depth=2),
            n_estimators=250),
        "Gaussian Process":
        gaussian_process.GaussianProcessRegressor(),
        # "Isotonic": isotonic.IsotonicRegression(),
        "Kernel Ridge":
        kernel_ridge.KernelRidge(),
        "Ridge CV":
        linear_model.RidgeCV(),
        # "Exp tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=np.exp,
        #                                            inverse_func=np.log),
        # "Log tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=np.log,
        #                                            inverse_func=np.exp),
        # "Inv tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=invert,
        #                                            inverse_func=invert),
        # "Log regressor": linear_model.LogisticRegressionCV(),
        "ML Perceptron":
        neural_network.MLPRegressor(max_iter=50000, hidden_layer_sizes=(5, 5)),
        "Linear SVR":
        linear_svc,
        "RBF SVR":
        svm.SVR(kernel='rbf'),
        "Poly SVR":
        svm.SVR(kernel='poly'),
        # "Sigmoid SVR": svm.SVR(kernel='sigmoid'),
        "Bayesian Ridge":
        linear_model.BayesianRidge(),
        "Huber":
        linear_model.HuberRegressor(),
        # "Poisson": linear_model.PoissonRegressor(),
        "K-neighbors":
        neighbors.KNeighborsRegressor()
    }
    # "Radius Neighbors": neighbors.RadiusNeighborsRegressor()}
    return regrs
def mcfs(X, n_selected_features, **kwargs):
    """
    This function implements unsupervised feature selection for multi-cluster data.

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data
    n_selected_features: {int}
        number of features to select
    kwargs: {dictionary}
        W: {sparse matrix}, shape (n_samples, n_samples)
            affinity matrix
        n_clusters: {int}
            number of clusters (default is 5)

    Output
    ------
    W: {numpy array}, shape(n_features, n_clusters)
        feature weight matrix

    Reference
    ---------
    Cai, Deng et al. "Unsupervised Feature Selection for Multi-Cluster Data." KDD 2010.
    """

    # use the default affinity matrix
    if 'W' not in kwargs:
        W = construct_W(X)
    else:
        W = kwargs['W']
    # default number of clusters is 5
    if 'n_clusters' not in kwargs:
        n_clusters = 5
    else:
        n_clusters = kwargs['n_clusters']

    # solve the generalized eigen-decomposition problem and get the top K
    # eigen-vectors with respect to the smallest eigenvalues
    W = W.toarray()
    W = (W + W.T) / 2
    W_norm = np.diag(np.sqrt(1 / W.sum(1)))
    W = np.dot(W_norm, np.dot(W, W_norm))
    WT = W.T
    W[W < WT] = WT[W < WT]
    eigen_value, ul = scipy.linalg.eigh(a=W)
    Y = np.dot(W_norm, ul[:, -1 * n_clusters - 1:-1])

    # solve K L1-regularized regression problem using LARs algorithm with cardinality constraint being d
    n_sample, n_feature = X.shape
    W = np.zeros((n_feature, n_clusters))
    for i in range(n_clusters):
        clf = linear_model.Lars(n_nonzero_coefs=n_selected_features)
        clf.fit(X, Y[:, i])
        W[:, i] = clf.coef_
    return W
Esempio n. 23
0
def run_specific_combination(test_frame, reg_type, column_list):
    target_feature = test_frame['Endurance_Score']
    test_df = test_frame.filter(column_list, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(
                test_df, target_feature.values.reshape(-1,1),
                test_size=0.20, random_state=0)
    if reg_type == 'dt':
        regr = DecisionTreeRegressor(max_depth=2)
    elif reg_type == 'lin':
        regr = linear_model.LinearRegression()
    elif reg_type == 'ridge':
        regr = linear_model.Ridge(alpha=1500.0)
    elif reg_type == 'lasso':
        regr = linear_model.Lasso(alpha=10.0)
    elif reg_type == 'bayridge':
        regr = linear_model.BayesianRidge()
    elif reg_type == 'sgd':
        regr = linear_model.SGDRegressor(loss='huber')
    elif reg_type == 'lars':
        regr = linear_model.Lars(n_nonzero_coefs=np.inf)
    elif reg_type == 'pasagv':
        regr = linear_model.PassiveAggressiveRegressor(random_state=0)
    elif reg_type == 'kernelridge':
        regr = kernel_ridge.KernelRidge()
    elif reg_type == 'svr':
        regr = svm.SVR()
    elif reg_type == 'kneigh':
        regr = neighbors.KNeighborsRegressor(algorithm='kd_tree')
    elif reg_type == 'gauss':
        regr = gaussian_process.GaussianProcessRegressor()
    elif reg_type == 'gbr':
        params = {'n_estimators': 760, 'max_depth': 4, 'min_samples_split': 3, 'learning_rate': 0.026, 'loss': 'huber'}
        regr = GradientBoostingRegressor(**params)
    elif reg_type == 'ran':
        regr = RandomForestRegressor(n_estimators=300, max_depth=8)
    elif reg_type == 'et':
            regr = ExtraTreesRegressor()
    else:
        return
    x_train_frame = X_train.copy()
    del x_train_frame['Title']
    del x_train_frame['Artist']
    regr.fit(x_train_frame, y_train.ravel())
    x_test_frame = X_test.copy()
    del x_test_frame['Title']
    del x_test_frame['Artist']
    y_pred = regr.predict(x_test_frame)
    rmse = mean_squared_error(y_test, y_pred)
    score = r2_score(y_test, y_pred)
    print("R2-score: {}, RMSE: {}".format(score, math.sqrt(rmse)))
    result_df = pd.DataFrame(columns=['Song', 'Artist', 'Endurance_Score', 'Predicted_Endurance_Score'])
    result_df['Song'] = X_test['Title']
    result_df['Artist'] = X_test['Artist']
    result_df['Endurance_Score'] = y_test.ravel()
    result_df['Predicted_Endurance_Score'] = y_pred
    result_df.to_csv('{0}/{1}.csv'.format(path_final_csv, 'predicted_midtermdata'), index=False)
Esempio n. 24
0
    def _train(self):
        x = self._train_set.features
        y = self._train_set.outputs

        self._transform = preprocessing.PolynomialFeatures(1)

        clf = linear_model.Lars(n_nonzero_coefs=400, fit_intercept=True)
        clf.fit(self._transform.fit_transform(x, y), y)

        self._model = clf.predict
Esempio n. 25
0
 def leastAngleRegression(self):
     lar = linear_model.Lars()
     lar.fit(self.training_order_start_end_districts_and_time,
             self.training_number_of_orders)
     predicted_number_of_orders = lar.predict(
         self.testing_order_start_end_districts_and_time)
     current_ride_prediction_error = numpy.mean(
         (predicted_number_of_orders - self.testing_number_of_orders)**2)
     print(current_ride_prediction_error)
     print(lar.coef_)
Esempio n. 26
0
def train_lars_regressor(X, y, k_fold):
    ##  CROSS VALIDATION
    # Without evenly distributing classes
    kf = KFold(n_splits=k_fold, shuffle=True, random_state=None)
    kf.get_n_splits()

    print(kf)
    print(kf.get_n_splits())

    # Evenly Distributing Classes: Stratified K-fold
    # skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
    # skf.get_n_splits(X, y)
    # print(skf)

    # SVM classifier definition
    regressor = linear_model.Lars(fit_intercept=True,
                                  verbose=False,
                                  normalize=True,
                                  precompute='auto',
                                  n_nonzero_coefs=500,
                                  eps=2.2204460492503131e-16,
                                  copy_X=True,
                                  fit_path=True,
                                  positive=False)

    # k-Fold loop
    counter = 0
    model = []
    r2 = []
    print("\nShape")
    print(X.shape)
    print(y.shape)
    for train_index, test_index in kf.split(X, y=y):
        # print("TRAIN: ", train_index, " TEST: ", test_index)
        # get indices
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        ### SVM CLASSIFIER ####################################
        # fit model
        model.append(regressor.fit(X_train, y_train))

        # predict
        y_pred = model[counter].predict(X_test)

        print("Coefficient of Determination for Regressor ", counter)
        r2.append(model[counter].score(X_test, y_test))
        print(r2[counter])

        counter += 1

    # choose svm with highest r2
    idx = np.argmax(r2)
    return model[idx]
Esempio n. 27
0
def compare_regressors():
    """ Compare the performance of regressors. """

    regressors = {
        'Linear': linear_model.LinearRegression(),
        'Ridge': linear_model.Ridge(),
        'Lasso': linear_model.Lasso(),
        'LAR': linear_model.Lars(),
        'SGD': linear_model.SGDRegressor(),
        #'ARD': linear_model.ARDRegression() # takes a minute or two
        }
    _compare_estimators(regressors, classify=False)
Esempio n. 28
0
 def default_models_(self):
     return {
         'Tree': {'clf': tree.DecisionTreeRegressor(),
                  'param': {'max_depth': [3, 5, 7, 10, 20]
                            }},
         'GBDT': {'clf': ensemble.GradientBoostingRegressor(random_state=1),
                  'param': {
                      'n_estimators': [50, 100, 150, 200],
                      'learning_rate': [0.1],
                      'max_depth': [4, 6, 8],
                      'alpha': [0.7, 0.8, 0.9],
                      'max_leaf_nodes': [10, 20],
                      'min_samples_split': [2, 4, 7]
                  }},
         'Lin': {'clf': linear_model.LinearRegression(),
                 'param': {
                     'fit_intercept': [True, False],
                     'normalize': [True, False]
                 }},
         'Ridge': {'clf': linear_model.Ridge(),
                   'param': {}},
         'Lasso': {'clf': linear_model.Lasso(),
                   'param': {}},
         'ElasN': {'clf': linear_model.ElasticNet(),
                   'param': {}},
         'Lars': {'clf': linear_model.Lars(),
                  'param': {}},
         'Bayers': {'clf': linear_model.BayesianRidge(),
                    'param': {}},
         'Poly2': {'clf': Pipeline([('poly', PolynomialFeatures(degree=2)),
                                    ('std_scaler', StandardScaler()),
                                    ('line_reg', linear_model.LinearRegression())
                                    ]),
                   'param': {}},
         'SGD': {'clf': linear_model.SGDRegressor(),
                 'param': {}},
         'SVM': {'clf': svm.SVR(kernel='rbf', C=1.0, epsilon=1),
                 'param': {
                     'C': [1, 10, 100, 1000, 10000]
                 }},
         'Knn': {'clf': neighbors.KNeighborsRegressor(),
                 'param': {}},
         'RF': {'clf': ensemble.RandomForestRegressor(random_state=1),
                'param':
                    {'n_estimators': [10, 30, 50, 100, 150], }},
         'ADA': {'clf': ensemble.AdaBoostRegressor(n_estimators=100),
                 'param': {}},
         'BAG': {'clf': BaggingRegressor(bootstrap=True),
                 'param': {'n_estimators': [50, 100, 200]}},
         'ET': {'clf': tree.ExtraTreeRegressor(),
                'param': {}},
     }
Esempio n. 29
0
    def __init__(self, method, params, i=0):
        self.algorithm_list = [
            'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge',
            'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR'
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        #print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])
 def test_model_lars(self):
     model, X = _fit_model(linear_model.Lars())
     model_onnx = convert_sklearn(
         model, "lars", [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float32),
         model,
         model_onnx,
         basename="SklearnLars-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )