コード例 #1
0
ファイル: Model.py プロジェクト: dlont/kbc
    def build_best_prediction(self):
        print("Building LassoLarsIC linear regression vanilla model!")

        from matplotlib import pyplot
        from sklearn.linear_model import LassoLarsIC
        from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error

        target_variable_names = self._configuration['model']['target'][0]
        data_provider = self.get_data_provider(
            self._configuration[target_variable_names]['data_provider'])

        input_features_names = self._configuration['model']['input_features']
        X_train = data_provider.train[input_features_names]
        y_train = data_provider.train[target_variable_names]

        X_test = data_provider.test[input_features_names]
        y_test = data_provider.test[target_variable_names]

        # print X_train.dtypes
        # print X_train.head()
        # print X_test.dtypes
        # print X_test.head()

        # print y_train.dtypes
        # print y_train.head()
        # print y_test.dtypes
        # print y_test.head()

        my_model_aic = LassoLarsIC(criterion='aic')
        my_model_aic.fit(X_train, y_train)
        y_pred_aic = my_model_aic.predict(X_test)
        # print "Max error: ", max_error(y_test,y_pred)
        print("AIC Explained variance score: ",
              explained_variance_score(y_test, y_pred_aic))
        print("AIC Mean absolute error: ",
              mean_absolute_error(y_test, y_pred_aic))
        print("AIC Mean squared error: ",
              mean_squared_error(y_test, y_pred_aic))

        my_model_bic = LassoLarsIC(criterion='bic')
        my_model_bic.fit(X_train, y_train)
        y_pred_bic = my_model_bic.predict(X_test)
        # print "Max error: ", max_error(y_test,y_pred)
        print("BIC Explained variance score: ",
              explained_variance_score(y_test, y_pred_bic))
        print("BIC Mean absolute error: ",
              mean_absolute_error(y_test, y_pred_bic))
        print("BIC Mean squared error: ",
              mean_squared_error(y_test, y_pred_bic))

        self.fit_results = {'aic': my_model_aic, 'bic': my_model_bic}
        pickle.dump(
            self.my_model,
            open(self._configuration['model']['output_filename'], 'wb'))

        pass
コード例 #2
0
def _lassolarsic(*,
                 train,
                 test,
                 x_predict=None,
                 metrics,
                 criterion='aic',
                 fit_intercept=True,
                 verbose=False,
                 normalize=True,
                 precompute='auto',
                 max_iter=500,
                 eps=2.220446049250313e-16,
                 copy_X=True,
                 positive=False):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsIC.html#sklearn.linear_model.LassoLarsIC
    """

    model = LassoLarsIC(criterion=criterion,
                        fit_intercept=fit_intercept,
                        verbose=verbose,
                        normalize=normalize,
                        precompute=precompute,
                        max_iter=max_iter,
                        eps=eps,
                        copy_X=copy_X,
                        positive=positive)
    model.fit(train[0], train[1])
    model_name = 'LassoLarsIC'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
コード例 #3
0
ファイル: histograms.py プロジェクト: kfirmanor/Who_s-code
class HistogramClassifier:
    def __init__(self):
        X, y = make_dataframe(letter_list)
        self.columns = list(X.columns)
        self.classifier = LassoLarsIC()
        self.classifier.fit(X, y)

    def predict(self, X):
        counter = snippet_to_histogram(X, letter_list)
        df = pd.DataFrame(columns=self.columns)
        df = df.append(counter, ignore_index=True).fillna(0)
        return self.classifier.predict(df)
コード例 #4
0
class LassoLarsICImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
コード例 #5
0
class r07546035_ICRegression(regression):
    def trainAlgo(self):

        self.model = LassoLarsIC(criterion=self.param['criterion'],
                                 fit_intercept=self.param['fit_intercept'],
                                 normalize=self.param['normalize'],
                                 max_iter=self.param['max_iter'],
                                 eps=self.param['eps'],
                                 positive=self.param['positive'])

        self.model.fit(self.inputData['X'], self.outputData['Y'])

    def predictAlgo(self):

        self.result['Y'] = self.model.predict(self.inputData['X'])
コード例 #6
0
ファイル: code.py プロジェクト: shibaji7/DA2Final
def __lasso_selected(data,data_test, response):
    X = data.drop([response],axis=1).as_matrix()
    y = np.array(data[response].tolist()).reshape((len(data),1))
    #X = sm.add_constant(X)
    #model = sm.OLS(y,X)
    #m = model.fit_regularized(refit=True)
    #yp = m.predict(data_test)
    reg = LassoLarsIC(criterion='bic')
    print y.shape,X.shape
    reg.fit(X,y)
    x = data_test.drop([response],axis=1).as_matrix().reshape((len(data_test),len(data_test.keys())-1))
    yp = reg.predict(x)
    te = np.mean((yp-np.array(data_test[response].tolist()))**2)
    print reg.coef_,te
    return
コード例 #7
0
ファイル: histograms.py プロジェクト: kfirmanor/Who_s-code
class HistogramClassifier:
    def __init__(self):
        X, y = make_dataframe(letter_list)
        self.columns = list(X.columns)
        self.classifier = LassoLarsIC()
        self.classifier.fit(X, y)

    def predict(self, X):
        counter = snippet_to_histogram(X, letter_list)
        df = pd.DataFrame(columns=self.columns)
        df = df.append(counter, ignore_index=True).fillna(0)
        y = np.zeros(len(X))
        for i in range(len(X)):
            y[i] = self.classifier.predict(df)
        y = round(y.sum() / len(X))
        return y
コード例 #8
0
ファイル: yatsm.py プロジェクト: hylhero/yatsm
    def fit_models_LassoCV(self, X, Y, bands=None):
        """ Try to fit models to training period time series """
        if bands is None:
            bands = self.fit_indices

        models = []

        for b in bands:
            # lasso = LassoCV(n_alphas=100)
            # lasso = LassoLarsCV(masx_n_alphas=100)
            lasso = LassoLarsIC(criterion='bic')
            lasso = lasso.fit(X, Y[b, :])
            lasso.nobs = Y[b, :].size
            lasso.coef = np.copy(lasso.coef_)
            lasso.coef[0] += lasso.intercept_
            lasso.fittedvalues = lasso.predict(X)
            lasso.rss = np.sum((Y[b, :] - lasso.fittedvalues) ** 2)
            lasso.rmse = math.sqrt(lasso.rss / lasso.nobs)

            models.append(lasso)

        return np.array(models)
コード例 #9
0
def trainData(fileName):
    df = pd.read_csv(fileName, index_col='date')

    df = df.sort_index()
    df = df[[
        'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
        'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover'
    ]]

    df = df[['open', 'high', 'low', 'close', 'volume']]
    df['HL_PCT'] = (df['high'] - df['low']) / df['close'] * 100.0
    df['PCT_change'] = (df['close'] - df['open']) / df['open'] * 100.0
    df = df[['close', 'HL_PCT', 'PCT_change', 'volume']]
    # print(df.head())
    forecast_col = 'close'
    df.fillna(value=-99999, inplace=True)
    # forecast_out = int(math.ceil(0.01 * len(df)))
    forecast_out = 1
    # ??forecast_out???
    df['label'] = df[forecast_col].shift(-forecast_out)

    print(df.shape)
    print(df)
    X = np.array(df.drop(['label'], 1))

    X = preprocessing.scale(X)

    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    df.dropna(inplace=True)
    print(X)
    print(X_lately)
    y = np.array(df['label'])
    # print(y)
    print(X.shape)
    print(y.shape)
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.2)

    clf = LassoLarsIC(max_iter=100)
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    joblib.dump(clf, "%s.m" % fileName)
    print(accuracy, "---------score------")

    forecast_set = clf.predict(X_lately)

    print(forecast_out)
    style.use('ggplot')
    df['Forecast'] = np.nan
    last_date = df.iloc[-1].name

    date_time = datetime.datetime.strptime(last_date, '%Y-%m-%d')
    last_unix = date_time.timestamp()
    one_day = 86400
    next_unix = last_unix + one_day
    print(forecast_set)
    for i in forecast_set:
        next_date = datetime.datetime.fromtimestamp(next_unix)
        next_unix += 86400
        df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i]
    print(df.tail(forecast_out))

    df['close'].plot()
    df['Forecast'].plot()
    plt.show()
コード例 #10
0
def lasso(X, y):
    clf = LassoLarsIC(criterion='aic')
    clf.fit(X, y)
    y_pred = clf.predict(X)
    return y_pred, clf.alpha_, clf.coef_
コード例 #11
0
##  'North_American': 0.0,
##  'OPEC': -1.0037125526070625,
##  'PRS International Country Risk Guide': 0.0,
##  'South_American': 1.1666702294227076,
##  'World Economic Forum EOS': -1.1639115442413683,
##  'Years_In_Nato': 0.0,
##  'alcconsumption': 0.59855758131369263,
##  'armedforcesrate': 0.0,
##  'employrate': -2.2695726938628469,
##  'femaleemployrate': 1.0671515028671372,
##  'incomeperperson': 1.191656220279911,
##  'internetuserate': -2.4535120774767076,
##  'lifeexpectancy': 0.0}

from sklearn.metrics import mean_squared_error
train_error_aic = mean_squared_error(tar_train, model_aic.predict(pred_train))
test_error_aic = mean_squared_error(tar_test, model_aic.predict(pred_test))
print ('training data MSE')
print(train_error_aic)
print ('test data MSE')
print(test_error_aic)


# R-square from training and test data
rsquared_train_aic=model_aic.score(pred_train,tar_train)
rsquared_test_aic=model_aic.score(pred_test,tar_test)
print ('training data R-square')
print(rsquared_train_aic)
print ('test data R-square')
print(rsquared_test_aic)
コード例 #12
0
########################################################################################################################
##################                            BIC CRITERION                                     ##################
########################################################################################################################
from sklearn.linear_model import LassoLarsIC
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_model_selection.html#sphx-glr-auto-examples-linear-model-plot-lasso-model-selection-py
EPSILON = 1e-4
X = np.array(X_train)
y = np.array(y_train)

model_bic = LassoLarsIC(criterion='bic')

model_bic.fit(X, y)
alpha_bic_ = model_bic.alpha_
# alpha of 159
BIC_pred = model_bic.predict(np.array(X_test))
R2_BIC = r2_score(BIC_pred, np.array(y_test))  # 0.796

model_aic = LassoLarsIC(criterion='aic')
model_aic.fit(X, y)
alpha_aic_ = model_aic.alpha_
# alpha 54.23 (really different from the BIC)
AIC_pred = model_aic.predict(np.array(X_test))
R2_AIC = r2_score(AIC_pred, np.array(y_test))  # 0.879


def plot_ic_criterion(model, name, color):
    alpha_ = model.alpha_ + EPSILON
    alphas_ = model.alphas_ + EPSILON
    criterion_ = model.criterion_
    plt.plot(-np.log10(alphas_),
コード例 #13
0
    tss, rss, ess, r2 = xss(Y, lassoLarscv.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试LassoLarsIC类**********"
    lassoLarsIC = LassoLarsIC()
    # lassoLarsIC = LassoLarsIC(criterion='bic')
    # 拟合训练集
    lassoLarsIC.fit(train_X, train_Y.values.ravel())
    # 打印模型的系数
    print "系数:", lassoLarsIC.coef_
    print "截距:", lassoLarsIC.intercept_
    print '训练集R2: ', r2_score(train_Y, lassoLarsIC.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = lassoLarsIC.predict(test_X)
    print "测试集得分:", lassoLarsIC.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, lassoLarsIC.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2
コード例 #14
0
ファイル: source.py プロジェクト: lwc657/ncts_final
#%% y normalization

mY = y_train.mean()
sY = y_train.std()
y_train = (y_train - mY) / sY
# y_test = ( y_test - mY ) / sY

#%% lasso regression
"""
lassocv.alplha_ is different from which in R
"""

mName = 'lasso'
lassocv = LassoLarsIC()
lassocv.fit(X_train, y_train)
y_train_pred = lassocv.predict(X_train)
predictions = lassocv.predict(X_test)
predictions = predictions * sY + mY
#predAll = np.append(predAll,predictions).reshape([-1,1])
coef = lassocv.coef_
lassocv.alpha_

draw_prediction(predictions, y_test, mName)

## lassocv = LassoCV(random_state=0, eps=1e-9, cv=10, n_alphas=100)
#lassocv = LassoLarsCV()
#lassocv.fit(X_train, y_train)
#y_train_pred = lassocv.predict(X_train)
#predictions = lassocv.predict(X_test)
#np.mean(abs(y_train_pred - y_train))/np.mean(y_train)
#np.mean(abs(predictions - y_test))/np.mean(y_test)
コード例 #15
0
ファイル: simple_models.py プロジェクト: kubapok/pm-task
    print('maive MSE: ', mean_squared_error(
        [m for _ in range(len(y_test))], y_test))
    print()

    print('-'*100)
    # linear model
    lm = sm.OLS(y_train, X_train).fit()
    print(lm.summary())
    print('lm MSE: ', mean_squared_error(lm.predict(X_test), y_test))
    print('lm AIC: ', lm.aic)

    print('-'*100)
    # AIC
    print("AIC")
    aic = LassoLarsIC(criterion='aic')
    aic.fit(X_train, y_train)

    predictions = aic.predict(X_test)
    print(mean_squared_error(y_test, predictions))
    print(aic.coef_)

    print('-'*100)
    # SGD
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    sgd = SGDRegressor(penalty='l2', alpha=0.15, n_iter=200)
    sgd = sgd.fit(X_train_scaled, y_train)
    predictions = sgd.predict(scaler.transform(X_test))
    print('sgd: ', mean_squared_error(y_test, predictions))