Esempio n. 1
0
    def __init__(self, method, yrange, params, i=0, ransacparams={}):
        self.method = method
        self.outliers = None
        self.inliers = None
        self.ransac = False
        self.yrange = yrange[i]

        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])
        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])
        if self.method[i] == 'OMP':
            #check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.OrthogonalMatchingPursuit(**params_temp)
            else:
                params_temp.pop('n_nonzero_coefs')
                self.model = linear.OrthogonalMatchingPursuitCV(**params_temp)

        if self.method[i] == 'Lasso':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Lasso(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.LassoCV(**params_temp)

        if self.method[i] == 'Elastic Net':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.ElasticNet(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.ElasticNetCV(**params_temp)

        if self.method[i] == 'Ridge':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Ridge(**params_temp)
            else:
                #Ridge requires a specific set of alphas to be provided... this needs more work to be implemented correctly
                self.model = linear.RidgeCV(**params_temp)

        if self.method[i] == 'Bayesian Ridge':
            self.model = linear.BayesianRidge(**params[i])
        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])
        if self.method[i] == 'LARS':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Lars(**params_temp)
            else:
                self.model = linear.LarsCV(**params_temp)

        if self.method[i] == 'Lasso LARS':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # check whether to do IC or not
            self.do_ic = params[i]['IC']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV and IC parameter
            params_temp.pop('CV')
            params_temp.pop('IC')
            if self.do_cv is False and self.do_ic is False:
                self.model = linear.LassoLars(**params[i])
            if self.do_cv is True and self.do_ic is False:
                self.model = linear.LassoLarsCV(**params[i])
            if self.do_cv is False and self.do_ic is True:
                self.model = linear.LassoLarsIC(**params[i])
            if self.do_cv is True and self.do_ic is True:
                print(
                    "Can't use both cross validation AND information criterion to optimize!"
                )

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])
        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            #get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            #create a temporary set of parameters
            params_temp = copy.copy(params[i])
            #Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
########################################################################################################################

r1 = linear_model.LinearRegression(normalize=True, n_jobs=29)
r2 = ensemble.RandomForestRegressor(max_depth=3,
                                    min_samples_split=2,
                                    random_state=0,
                                    n_estimators=700)
r3 = ensemble.AdaBoostRegressor(random_state=0,
                                loss='linear',
                                learning_rate=3.0,
                                n_estimators=700)
r4 = ensemble.GradientBoostingRegressor()
r5 = ensemble.BaggingRegressor()  # overfitting
r6 = ensemble.ExtraTreesRegressor()  # overfitting
r7 = linear_model.BayesianRidge(normalize=True)
r8 = linear_model.ARDRegression(normalize=True)
r9 = linear_model.HuberRegressor()
r10 = linear_model.Lasso(random_state=0, selection='cyclic', normalize=False)
r11 = svm.LinearSVR(random_state=0,
                    loss='squared_epsilon_insensitive',
                    dual=True)
r12 = gaussian_process.GaussianProcessRegressor()  # overfitting
r13 = linear_model.PassiveAggressiveRegressor()  # takes okayisch time
r14 = linear_model.RANSACRegressor()  # overfitting?
r15 = linear_model.SGDRegressor(shuffle=True,
                                penalty='l1',
                                loss='squared_epsilon_insensitive',
                                learning_rate='invscaling',
                                epsilon=0.1,
                                early_stopping=False,
                                average=True)
Esempio n. 3
0
import time

import numpy as np
import matplotlib.pyplot as plt
import pandas

from sklearn import linear_model
from sklearn import datasets

dataset = pandas.read_csv('clean6001.csv')

array = dataset.values[:2000]
X = array[:, 3:5]
y = array[:, 6]

model_aic = linear_model.ARDRegression()
model_aic.fit(X, y)

y_aic = model_aic.predict(X)
Y_validation = y
plt.scatter(range(len(X))[0:2000], Y_validation[:2000], color='orange')
plt.plot(range(len(X))[0:2000], y_aic[:2000], color='red', linewidth=3)

plt.show()
Esempio n. 4
0
                              columns=cols_dynamicRes)  # training results

#X_train1_dynamic, X_test1_dynamic, y_train1_dynamic, y_test1_dynamic = train_test_split(dfArr1_dynamic, dfRes1_dynamic, test_size=0.2)

#print (X_train1_dynamic.shape, y_train1_dynamic.shape)
#print (X_test1_dynamic.shape, y_test1_dynamic.shape)

#feat_extr = SelectKBest(k=7)
#fitter = feat_extr.fit(dfArr1_dynamic, ravel(dfRes1_dynamic))

#scores1 = fitter.scores_

#scores = pd.DataFrame(fitter.scores_, index=cols_dynamicAttr)

#model = ExtraTreesClassifier()
#model = model.fit(dfArr1_dynamic, ravel(dfRes1_dynamic))

#model_scores = pd.DataFrame(model.feature_importances_, index=cols_dynamicAttr)

#rlasso = RandomizedLasso()
#lasso = rlasso.fit(dfArr1_dynamic, ravel(dfRes1_dynamic))

#lasso_scores = pd.DataFrame(lasso.scores_, index=cols_dynamicAttr)

ard = linear_model.ARDRegression(compute_score=True)
autorelevdet = ard.fit(dfArr1_dynamic, ravel(dfRes1_dynamic))

ard_scores = pd.DataFrame(autorelevdet.scores_, index=cols_dynamicAttr)

ard_coef = pd.DataFrame(autorelevdet.coef_, index=cols_dynamicAttr)
Esempio n. 5
0
def revenue_growth_model(ticker):
    financial_data = scraper.getFinancialData(ticker)

    revenue = financial_data["Revenue"]
    df = pd.DataFrame.from_dict(revenue.items())
    x = df[0].to_frame()  # x-values are the years
    y = df[1].to_frame()  # y-values are revenue values (given)

    ### linear modeling ###
    """ make the models """
    ols_reg = linear_model.LinearRegression()  #ordinary least squares
    ridge_reg = linear_model.Ridge()  #ridge regression
    lasso_reg = linear_model.Lasso()  #lasso regression
    LARS_reg = linear_model.LassoLars()  #least angle regression (on lasso)
    b_ridge_reg = linear_model.BayesianRidge()  #bayesian ridge regression
    ard_reg = linear_model.ARDRegression()  #bayesian ARD regression
    sgd_reg = linear_model.SGDRegressor(
    )  #stochastic gradient descent regression
    ransac_model = linear_model.RANSACRegressor(
        ols_reg)  #fit linear model with RANdom SAmple Consensus algorithm
    """ fit the models to a regression function based on data """
    ols_reg.fit(x, y)
    ridge_reg.fit(x, y)
    lasso_reg.fit(x, y)
    LARS_reg.fit(x, y)
    b_ridge_reg.fit(x, y)
    ard_reg.fit(x, y)
    sgd_reg.fit(x, y)
    ransac_model.fit(x, y)

    ### k-cross validation ###

    cv_scores = {
        'ols_scores': ols_reg.score(x, y),
        'ridge_scores': ridge_reg.score(x, y),
        'lasso_scores': lasso_reg.score(x, y),
        'LARS_scores': LARS_reg.score(x, y),
        'b_ridge_scores': b_ridge_reg.score(x, y),
        'ard_scores': ard_reg.score(x, y),
        'sgd_scores': sgd_reg.score(x, y),
        'ransac_scores': ransac_model.score(x, y)
    }
    vals = list(cv_scores.values())
    keys = list(cv_scores.keys())

    max_cv = keys[vals.index(max(vals))]
    print vals
    print max_cv

    predicted = []
    if max_cv == 'ols_scores':
        predicted = ols_reg.predict(x)
    elif max_cv == 'ridge_scores':
        predicted = ridge_reg.predict(x)
    elif max_cv == 'lasso_scores':
        predicted = lasso_reg.predict(x)
    elif max_cv == 'LARS_scores':
        predicted = LARS_reg.predict(x)
    elif max_cv == 'b_ridge_scores':
        predicted = b_ridge_reg.predict(x)
    elif max_cv == 'ard_scores':
        predicted = ard_reg.predict(x)
    elif max_cv == 'sgd_scores':
        predicted = sgd_reg.predict(x)
    else:
        predicted = ransac_model.predict(x)

    return {'x': x, 'y': y, 'max_cv': max_cv, 'predicted': predicted}
Esempio n. 6
0
    def __init__(self, method, yrange, params, i=0):  #TODO: yrange doesn't currently do anything. Remove or do something with it!
        self.algorithm_list = ['PLS',
                               'GP',
                               'OLS',
                               'OMP',
                               'Lasso',
                               'Elastic Net',
                               'Ridge',
                               'Bayesian Ridge',
                               'ARD',
                               'LARS',
                               'LASSO LARS',
                               'SVR',
                               'KRR',
                               'GBR'
                               ]
        self.method = method
        self.outliers = None
        self.ransac = False

        #print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
          # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'LASSO LARS':
            self.model = linear.LassoLars(**params)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            # get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcessRegressor(**params_temp)

        if self.method[i] == 'GBR':
            self.model = GradientBoostingRegressor(**params[i])
Esempio n. 7
0
def compute_regression_model(y, xs, years, country_list, target, ks):
    countries_list_iso3 = [
        pycountry.countries.get(name=country).alpha_3
        for country in country_list
    ]

    idx = pd.MultiIndex.from_product([countries_list_iso3, years],
                                     names=["Country", "Year"])
    col = ["Predicted"]
    prediction_df = pd.DataFrame('-', idx, col)

    res = defaultdict(dict)
    for country in countries_list_iso3:
        #country = pycountry.countries.get(name=c).alpha_3
        '''temp = xs_additional.loc[(years, country), :]
        temp.index = temp.index.droplevel(1)
        temp = pd.concat([temp for i in range(len(xs.index.levels[0].tolist())) ], keys=xs.index.levels[0].tolist(), names=['Province'])

        xs_plus = xs.copy()
        xs_plus = pd.concat([xs_plus, temp], axis=1)

        df = bdf.filter_origin_country_dataset(y, country, years, [target], xs_plus, 2)'''

        df = bdf.filter_origin_country_dataset(y, country, years, [target], xs,
                                               2)
        df = df.reset_index(level=0, drop=True)

        X = df.drop(["y"], axis=1)
        y_temp = df["y"]

        f_regression_norm = normalize(
            (f_regression(X, y_temp)[0]).reshape(1, -1))[0]
        mutual_info_regression_norm = normalize(
            mutual_info_regression(X, y_temp).reshape(1, -1))[0]

        scorers_aggregation = sum(
            [f_regression_norm, mutual_info_regression_norm])

        scorers_aggregation_norm = normalize(scorers_aggregation.reshape(
            1, -1))[0]

        scorers_list = [
            "f_regression_norm", "mutual_info_regression_norm",
            "scorers_aggregation_norm"
        ]

        models_function = [
            linear_model.LinearRegression(normalize=True),
            linear_model.LassoCV(alphas=[0.01, 0.05, 0.1, 1], normalize=True),
            linear_model.RidgeCV(alphas=[0.01, 0.05, 0.1, 1], normalize=True),
            linear_model.BayesianRidge(normalize=True),
            linear_model.ARDRegression(normalize=True)
        ]

        model = []
        mse = []
        features = []
        for scorer in scorers_list:
            #print(scorer)
            model_temp_k = []
            mse_temp_k = []
            features_temp_k = []
            for k in ks:
                temp = mse_best_model(X, y_temp,
                                      vars()[scorer], k, models_function)
                model_temp_k.append(temp[0])
                mse_temp_k.append(temp[1])
                features_temp_k.append(temp[2])

            model.append(model_temp_k[mse_temp_k.index(min(mse_temp_k))])
            mse.append(min(mse_temp_k))
            features.append(features_temp_k[mse_temp_k.index(min(mse_temp_k))])

        model = model[mse.index(min(mse))]
        features = features[mse.index(min(mse))]

        clf = [
            reg for reg in models_function if model == str(reg).split("(")[0]
        ][0].fit(X[features], y_temp)
        prediction = clf.predict(X[features])

        prediction_df.loc[(country, years), "Predicted"] = prediction
        print(country)
        res[country]["features"] = features
        res[country]["coefficients"] = np.concatenate(
            (np.array([clf.intercept_]), clf.coef_))
        res[country]["model"] = model
    #prediction_df.index = years
    prediction_df = prediction_df.swaplevel()
    prediction_df = prediction_df.sort_index()

    return (prediction_df, res)
Esempio n. 8
0
def training(request, model):
    acao = request.session['acao']
    bolsa = pd.read_csv("app/data/bolsa.csv",
                        index_col='Date').groupby('Codigo')
    dados = bolsa.get_group(acao)
    X = dados[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = dados['High'].shift(-1).fillna(method='pad')
    Y = pd.DataFrame({
        'Alta_real': dados['High'].shift(-1).fillna(method='pad'),
        'Baixa_real': dados['Low'].shift(-1).fillna(method='pad')
    })
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        shuffle=False,
                                                        random_state=0)
    X_train, X_test, Ytrain, Ytest = train_test_split(X,
                                                      Y,
                                                      test_size=0.20,
                                                      shuffle=False,
                                                      random_state=0)
    base = dados.to_html()

    #training
    regr = linear_model.BayesianRidge()
    regr.fit(X_train, y_train)

    #trainingmulti
    if (model == 'adr'):
        modelo = "Automatic Relevance Determination Regression"
        #regr_multi = MultiOutputRegressor(svm.SVR())
        regr_multi = MultiOutputRegressor(
            linear_model.ARDRegression(compute_score=True))
    elif (model == 'ada'):
        modelo = "Ada Regressor"
        regr_multi = MultiOutputRegressor(
            AdaBoostRegressor(random_state=0, n_estimators=100))
    elif (model == 'GB'):
        modelo = "GradientBoostingRegressor"
        regr_multi = MultiOutputRegressor(
            GradientBoostingRegressor(random_state=1, n_estimators=10))
    else:
        modelo = "LinerRegression com Bayesian Ridge"
        regr_multi = MultiOutputRegressor(linear_model.BayesianRidge())
    """
    # import votingregressor não funciona, precisa atualizar o sklearn
    elif (model == 'VR'):
        modelo = "Voting Regressor com GradientBoostingRegressor, RandomForestRegressor, LinearRegression"
        reg1 =  MultiOutputRegressor(GradientBoostingRegressor(random_state=1, n_estimators=10))
        reg2 =  MultiOutputRegressor(RandomForestRegressor(random_state=1, n_estimators=10))
        reg3 =  MultiOutputRegressor(LinearRegression())
        regr_multi = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)])
    """

    regr_multi.fit(X_train, Ytrain)
    Y_PRED = regr_multi.predict(X_test)
    real = pd.DataFrame(Ytest)
    previsto = pd.DataFrame(Y_PRED,
                            index=Ytest.index,
                            columns=['Alta_prevista', 'Baixa_prevista'])
    #real.rename(columns={"High": "real"})
    #previsto = previsto.set_index(real.index)
    data = pd.concat([real, previsto], axis=1)
    data['diferenca_alta'] = data['Alta_real'] - data['Alta_prevista']
    data['diferenca_baixa'] = data['Baixa_real'] - data['Baixa_prevista']
    erro = data['diferenca_alta']
    data = data.to_html()
    #data = previsto.head().to_html()
    """
    #forecast
    y_pred = regr.predict(X_test)
    real = pd.DataFrame(y_test)
    previsto = pd.DataFrame(y_pred, index=real.index, columns=['previsto'])
    #real.rename(columns={"High": "real"})
    #previsto = previsto.set_index(real.index)
    data = pd.concat([real,previsto],axis=1)
    data['diferenca'] = data['High']-data['previsto']
    erro = np.array(data['diferenca'])
    data = data.to_html()
    #data = previsto.head().to_html()
    """

    #metrics
    mae = mean_absolute_error(Ytest, Y_PRED)
    mse = mean_squared_error(Ytest, Y_PRED)
    ev = explained_variance_score(Ytest, Y_PRED, multioutput='uniform_average')
    r2 = r2_score(Ytest, Y_PRED)

    #chart

    plt.figure(figsize=(5, 5))
    plt.xlabel("Data")
    plt.ylabel("High")
    plt.title(acao)
    #plt.plot(y_train)
    plt.plot(Ytest['Alta_real'])
    plt.plot(previsto['Alta_prevista'])
    #plt.grid(True)
    plt.savefig("media/forecast_reg.png")

    plt.figure(figsize=(5, 5))
    plt.title('Erro Alta (real - prevista)4')
    plt.grid(True)
    plt.hist(erro, bins=5)
    plt.savefig("media/hist_reg.png")

    #params
    params = regr.get_params()

    #persistence
    if (model == 'VR'):
        dump(regr_multi, 'app/learners/' + acao + '_VR.joblib')
    elif (model == 'GB'):
        dump(regr_multi, 'app/learners/' + acao + '_GB.joblib')
    elif (model == 'adr'):
        dump(regr_multi, 'app/learners/' + acao + '_ADR.joblib')
    elif (model == 'ada'):
        dump(regr_multi, 'app/learners/' + acao + '_ADAR.joblib')
    else:
        dump(regr_multi, 'app/learners/' + acao + '_NBR.joblib')

    context = {
        'title': 'Treino Regressão',
        'mae': mae,
        'mse': mse,
        'ev': ev,
        'r2': r2,
        'base': base,
        'data': data,
        'acao': acao,
        'modelo': modelo,
        'params': params,
        'multi': Y_PRED[0]
    }
    return render(request, 'app/training.html', context)
Esempio n. 9
0
        poly2coefs = poly.polyfit(x, y, 2)
        poly2fit = poly.polyval(x_new, poly2coefs)
        fit_dic['poly2'] = poly2fit
    if 'poly3' in fits:
        poly3coefs = poly.polyfit(x, y, 3)
        poly3fit = poly.polyval(x_new, poly3coefs)
        fit_dic['poly3'] = poly3fit
    if 'spline' in fits:
        spline_params = splrep(x, y, s=s, k=3)
        splinefit = splev(x_new, spline_params)
        fit_dic['spline'] = splinefit
    return fit_dic


modeldict = {
    'ardregression': lm.ARDRegression(),
    'bayesianridge': lm.BayesianRidge(),
    'elasticnet': lm.ElasticNet(),
    'elasticnetcv': lm.ElasticNetCV(),
    'huberregression': lm.HuberRegressor(),
    'lars': lm.Lars(),
    'larscv': lm.LarsCV(),
    'lasso': lm.Lasso(),
    'lassocv': lm.LassoCV(),
    'lassolars': lm.LassoLars(),
    'lassolarscv': lm.LassoLarsCV(),
    'lassolarsic': lm.LassoLarsIC(),
    'linearregression': lm.LinearRegression(),
    'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(),
    'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(),
    'passiveagressiveregressor': lm.PassiveAggressiveRegressor(),
Esempio n. 10
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
    Fit a polynomial chaos expansion using linear regression.

    Args:
        P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`.
        x (array_like) : Collocation nodes with `x.shape=(D,K)`.
        u (array_like) : Model evaluations with `len(u)=K`.
        retall (bool) : If True return Fourier coefficients in addition to R.
        rule (str) : Regression method used.

    Returns:
        (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and
                `R.dim=D`. The Fourier coefficients in the estimation.

    Examples:
        >>> x, y = cp.variable(2)
        >>> P = cp.Poly([1, x, y])
        >>> s = [[-1,-1,1,1], [-1,1,-1,1]]
        >>> u = [0,1,1,2]
        >>> print(cp.around(fit_regression(P, s, u), 14))
        0.5q0+0.5q1+1.0
    """
    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = linalg.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = linear_model
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = linear_model.ARDRegression(fit_intercept=False,
                                                copy_X=False,
                                                **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = linear_model.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = cp.poly.sum((P * uhat), -1)
    R = cp.poly.reshape(R, shape)

    if retall == 1:
        return R, uhat

    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q

    return R
Esempio n. 11
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
Fit a polynomial chaos expansion using linear regression.

Parameters
----------
P : Poly
    Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`.
x : array_like
    Collocation nodes with `x.shape=(D,K)`.
u : array_like
    Model evaluations with `len(u)=K`.
retall : bool
    If True return uhat in addition to R
rule : str
    Regression method used.

    The follwong methods uses scikits-learn as backend.
    See `sklearn.linear_model` for more details.

    Key     Scikit-learn    Description
    ---     ------------    -----------
        Parameters      Description
        ----------      -----------

    "BARD"  ARDRegression   Bayesian ARD Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter
        threshold_lambda=1e-4   Upper pruning threshold

    "BR"    BayesianRidge   Bayesian Ridge Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter

    "EN"    ElastiNet       Elastic Net
        alpha=1.0       Dampening parameter
        rho             Mixing parameter in [0,1]
        max_iter=300    Maximum iterations
        tol             Optimization tolerance

    "ENC"   ElasticNetCV    EN w/Cross Validation
        rho             Dampening parameter(s)
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LA"    Lars            Least Angle Regression
        n_nonzero_coefs Number of non-zero coefficients
        eps             Cholesky regularization

    "LAC"   LarsCV          LAR w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv

    "LAS"   Lasso           Least Absolute Shrinkage and
                            Selection Operator
        alpha=1.0       Dampening parameter
        max_iter        Maximum iterations
        tol             Optimization tolerance

    "LASC"  LassoCV         LAS w/Cross Validation
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LL"    LassoLars       Lasso and Lars model
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "LLC"   LassoLarsCV     LL w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv
        eps             Cholesky regularization

    "LLIC"  LassoLarsIC     LL w/AIC or BIC
        criterion       "AIC" or "BIC" criterion
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "OMP"   OrthogonalMatchingPursuit
        n_nonzero_coefs Number of non-zero coefficients
        tol             Max residual norm (instead of non-zero coef)

    Local methods

    Key     Description
    ---     -----------
    "LS"    Ordenary Least Squares

    "T"     Ridge Regression/Tikhonov Regularization
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)

    "TC"    T w/Cross Validation
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)


Returns
-------
R[, uhat]

R : Poly
    Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`.
uhat : np.ndarray
    The Fourier coefficients in the estimation.

Examples
--------
>>> P = cp.Poly([1, x, y])
>>> x = [[-1,-1,1,1], [-1,1,-1,1]]
>>> u = [0,1,1,2]
>>> print fit_regression(P, x, u)
0.5q1+0.5q0+1.0

    """

    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], np.prod(u.shape[1:]))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = la.lstsq(Q, u)[0]

    elif rule == "T":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False)

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)

    else:

        # Scikit-learn wrapper
        try:
            _ = lm
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNetCV(**kws)

        elif rule == "LA":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = lm.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = po.sum((P * uhat.T), -1)
    R = po.reshape(R, shape)

    if retall == 1:
        return R, uhat
    elif retall == 2:
        return R, uhat, Q
    return R
Esempio n. 12
0
def regress_sys(folder,
                all_videos,
                yfit,
                training_size,
                randselect=True,
                trainingdata=[],
                frame=0,
                have_output=True,
                download=True,
                bucket_name='ccurtis.data'):
    """Uses regression based on image intensities to select tracking parameters.

    This function uses regression methods from the scikit-learn module to
    predict the lower quality cutoff values for particle filtering in TrackMate
    based on the intensity distributions of input images. Currently only uses
    the first frame of videos for analysis, and is limited to predicting
    quality values.

    In practice, users will run regress_sys twice in different modes to build
    a regression system. First, set have_output to False. Function will return
    list of randomly selected videos to include in the training dataset. The
    user should then manually track particles using the Trackmate GUI, and enter
    these values in during the next round as the input yfit variable.

    Parameters
    ----------
    folder : str
        S3 directory containing video files specified in all_videos.
    all_videos: list of str
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    yfit: numpy.ndarray
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size : int
        Number of files in training dataset.
    randselect : bool
        If True, will randomly select training videos from all_videos.
        If False, will use trainingdata as input training dataset.
    trainingdata : list of str
        Optional manually selected prefixes of video filenames to be
        used as training dataset.
    have_output: bool
        If you have already acquired the quality values (yfit) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.
    bucket_name : str
        S3 bucket containing videos to be downloaded for regression
        calculations.

    Returns
    -------
    regress_object : list of sklearn.svm.classes.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    tprefix : list of str
        Contains randomly selected images from all_videos to be included in
        training dataset.

    """

    if randselect:
        tprefix = []
        for i in range(0, training_size):
            random.seed(i + 1)
            tprefix.append(all_videos[random.randint(0, len(all_videos))])
            if have_output is False:
                print("Get parameters for: {}".format(tprefix[i]))
    else:
        tprefix = trainingdata

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            local_im = name + '.tif'
            remote_im = "{}/{}".format(folder, local_im)
            if download:
                aws.download_s3(remote_im, local_im, bucket_name=bucket_name)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[frame, :, :])
            descriptors[counter, 1] = np.std(test_image[frame, :, :])
            descriptors[counter, 2] = np.percentile(test_image[frame, :, :],
                                                    10)
            descriptors[counter, 3] = np.percentile(test_image[frame, :, :],
                                                    90)
            counter = counter + 1

        # Define regression techniques
        xfit = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(xfit, yfit))

        return regress_object

    else:
        return tprefix
Esempio n. 13
0
#Choose predictors
predictors = features

#Clean the data
tr_data = CleanHousingData(tr_data)

#Split the data with the folds
kf = KFold(n_splits=3, random_state=1, shuffle=True)
for train_index, test_index in kf.split(tr_data):
    trainsplit = tr_data.iloc[train_index, :]
    testsplit = tr_data.iloc[test_index, :]

#Finding out which algorithm adjusts better to the data
#Create the algorithm dictionary
ARD = linear_model.ARDRegression()
LinRe = linear_model.LinearRegression()
SGD = linear_model.SGDRegressor()
BR = linear_model.BayesianRidge()
Lars = linear_model.Lars()
Lasso = linear_model.Lasso()
PA = linear_model.PassiveAggressiveRegressor()
RANSAC = linear_model.RANSACRegressor()
Theil = linear_model.TheilSenRegressor()
Gboost = ensemble.GradientBoostingRegressor()
algorithms = {
    'Linear Regression': LinRe,
    'Bayesian ARD regression': ARD,
    'BayesianRidge': BR,
    'Lars': Lars,
    'Lasso': Lasso,
Esempio n. 14
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)
Esempio n. 15
0
# quantile_transformer = preprocessing.QuantileTransformer(
#     output_distribution='normal', random_state=42, n_quantiles=73)

# X_trans = quantile_transformer.fit_transform(X)

# plt.hist(z)



X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 5)

lr = linear_model.LinearRegression()
lasso = linear_model.Lasso()
ridge = linear_model.RidgeCV()
bayard = linear_model.ARDRegression()
# bayridge = line_mode.BayesianRidge()


models = [lr, lasso, ridge, bayard]

for model in models: 
    print(model)
    model.fit(X_train, y_train)
    print(model.score(X_test, y_test))
    print(model.intercept_)
    print(model.coef_)
 


# efs = EFS(lr, ### best subset index given 1, 2, 4,6,9,10,11,13,14 -> 'eFG%', 'OppeFG%' ,'ORB%','OppDRB%', 'DRB%', 'TOV%', 'OppTOV%', 'STL%', 'OppPF' , second run through can drop OppDRB% as its the inverse of ORB% 
    # ## Bayesian Ridge Regression
    Bayesreg = linear_model.BayesianRidge()
    Bayesreg_model_fit = model_fit(Bayesreg, 'Bayesian_Ridge_Regression',
                                   X_train, y_train, X_cv, y_cv, X_test,
                                   y_test, features_name, train, cv, test)
    coef = pd.DataFrame(Bayesreg.coef_,
                        index=features_name,
                        columns=['features_importance'])
    coef.sort_index(ascending=False, inplace=True)
    print(coef.head(10).round(6))
    coef.to_csv(para.path_results + "features_importance_Bayesreg.csv")

    # ## ARD Regression

    ardreg = linear_model.ARDRegression()
    ardreg_model_fit = model_fit(ardreg, 'ARD_Regression', X_train, y_train,
                                 X_cv, y_cv, X_test, y_test, features_name,
                                 train, cv, test)

    coef = pd.DataFrame(ardreg.coef_,
                        index=features_name,
                        columns=['features_importance'])
    coef.sort_index(ascending=False, inplace=True)
    print(coef.head(10).round(6))
    coef.to_csv(para.path_results + "features_importance_ardreg.csv")

    # ## TheilSen Regression

    theilsenreg = linear_model.TheilSenRegressor()
    theilsenreg_model_fit = model_fit(theilsenreg, 'TheilSen_Regression',
Esempio n. 17
0
        regression(linear_model.HuberRegressor()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ARDRegression()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),

        # Logistic Regression
        classification(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifierCV()),
        classification(linear_model.SGDClassifier(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification_binary(
Esempio n. 18
0
def regress_sys(folder, all_videos, y, training_size, have_output=True):
    """
    Uses regression techniques to select the best tracking parameters.
    Regression again intensities of input images.

    Parameters
    ----------
    all_videos: list
        Contains prefixes of video filenames of entire video set to be
        tracked.  Training dataset will be some subset of these videos.
    y: numpy array
        Contains manually acquired quality levels using Trackmate for the
        files contained in the training dataset.
    training_size: int
        Number of files in training dataset.
    have_output: boolean
        If you have already acquired the quality values (y) for the
        training dataset, set to True.  If False, it will output the files
        the user will need to acquire quality values for.

    Returns
    -------
    regress_object: list of sklearn regression objects.
        Contains list of regression objects assembled from the training
        datasets.  Uses the mean, 10th percentile, 90th percentile, and
        standard deviation intensities to predict the quality parameter
        in Trackmate.
    """

    tprefix = []
    for i in range(0, training_size):
        random.seed(i + 1)
        tprefix.append(all_videos[random.randint(0, len(all_videos))])
        if have_output is False:
            print("Get parameters for: {}".format(tprefix[i]))

    if have_output is True:
        # Define descriptors
        descriptors = np.zeros((training_size, 4))
        counter = 0
        for name in tprefix:
            pup = name.split('_')[0]
            local_im = name + '.tif'
            remote_im = "{}/{}/{}".format(folder, pup, local_im)
            aws.download_s3(remote_im, local_im)
            test_image = sio.imread(local_im)
            descriptors[counter, 0] = np.mean(test_image[0, :, :])
            descriptors[counter, 1] = np.std(test_image[0, :, :])
            descriptors[counter, 2] = np.percentile(test_image[0, :, :], 10)
            descriptors[counter, 3] = np.percentile(test_image[0:, :, :], 90)
            counter = counter + 1

        # Define regression techniques
        X = descriptors
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        regress_object = []
        for item in classifiers:
            clf = item
            regress_object.append(clf.fit(X, y))

        return regress_object
Esempio n. 19
0
from sklearn import preprocessing
from sklearn import utils


lab_enc = preprocessing.LabelEncoder()
y_train_encoded = lab_enc.fit_transform(y_train)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

classifiers = {
    'SVR':svm.SVR(),
	'SVC':SVC(),
    'SGD':linear_model.SGDRegressor(),
    'BAYES':linear_model.BayesianRidge(),
    'LL':linear_model.LassoLars(),
    'ARD':linear_model.ARDRegression(),
    'PA':linear_model.PassiveAggressiveRegressor(),
    'TS':linear_model.TheilSenRegressor(),
    'L':linear_model.LinearRegression()
	}

train_scores = []
test_scores = []
names = []
models = {}
for key in classifiers.keys(): 
    clf = classifiers[key]
    clf.fit(X_train, y_train)
    train_score = clf.score(X_train, y_train)
    test_score = clf.score(X_test, y_test)
    y_test_predict = clf.predict(X_test)
Esempio n. 20
0
    def __init__(self):
        ''' 
        Class constructor or initialization method. 
        '''
        # keys and tokens from the Twitter Dev Console 
        consumer_key = 'wELRpStXm3ClfLm1bmFNnHylH'
        consumer_secret = 'FHpTU0BBClgULhOMFrp2QyjaMcFg9LDWaNO2buyTQJ0WUtxyvW'
        access_token = '1236399499565608961-UtDzGjrLbcRevxCJRX2gAIv9s5HIhV'
        access_token_secret = 'MscQlrcL0vtGPBxct09tXTVxgwQD70UnOxEs0bY19X7yD'

        # attempt authentication 
        try: 
            # create OAuthHandler object 
            self.auth = OAuthHandler(consumer_key, consumer_secret) 
            # set access token and secret 
            self.auth.set_access_token(access_token, access_token_secret) 
            # create tweepy API object to fetch tweets 
            self.api = tweepy.API(self.auth) 
        except:
            print("Error: Authentication Failed")

        # creating object of TwitterClient Class 
        # api = TwitterClient()
        # calling function to get tweets
        wSent = ["WSENT"]
        aSent = ["ASENT"]

        for index in range(3,8):
            day = datetime.date.today() - datetime.timedelta(days = index)
            wTweets = self.get_tweets(query = 'weather', count = 100, geocode='41.2565,-96.05,5mi', until=day)
            aTweets = self.get_tweets(query = '', count = 100, geocode='41.2565,-96.05,5mi', until = day)

            ptweets = [tweet for tweet in wTweets if tweet['sentiment'] == 'positive']
            ntweets = [tweet for tweet in wTweets if tweet['sentiment'] == 'negative']
            netPosSent = (len(ptweets)/len(wTweets)) - (len(ntweets)/len(wTweets))

            wSent.append(netPosSent)

            ptweets = [tweet for tweet in aTweets if tweet['sentiment'] == 'positive']
            ntweets = [tweet for tweet in aTweets if tweet['sentiment'] == 'negative']
            netPosSent = (len(ptweets)/len(aTweets)) - (len(ntweets)/len(aTweets))

            aSent.append(netPosSent)
        
        # print(wSent)
        # print(aSent)


        url = "https://www.ncei.noaa.gov/orders/cdo/2069913.csv"

        dataset = pandas.read_csv(url)
        dataset = dataset.drop(['STATION', 'NAME', 'DATE'], axis = 1)
        dataset['WSENT'] = wSent[1:]
        # dataset['ASENT'] = aSent[1:]
        dataset = dataset.dropna()
        # print(dataset.shape)

        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        trainingData   = dataset.drop(['WSENT'], axis=1)
        trainingScores = dataset['WSENT']
        predictionData = dataset.drop(['WSENT'], axis=1)

        global clf

        for item in classifiers:
            # print(item)
            clf = item
            clf.fit(trainingData, trainingScores)
            print(clf.predict(predictionData),'\n')