Ejemplo n.º 1
0
def runMain():
    boston_data = datasets.load_boston()
    X = boston_data.data
    y = boston_data.target
    #  print '样本'
    #  print X[:5,:]
    #  print '标签'
    #  print y[:5]

    lr_model = LinearRegression()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=1 / 3.,
                                                        random_state=3)
    lr_model.fit(X_train, y_train)

    print 'xtrain====', X_train
    print 'X_test====', X_test

    print 'y_train====', y_train
    print 'y_test====', y_test
    #返回参数
    params = lr_model.get_params()
    print '参数:'
    print params

    train_scor = lr_model.score(X_train, y_train)
    test_score = lr_model.score(X_test, y_test)
    print '训练集打分:'
    print train_scor
    print '测试集打分:'
    print test_score

    return ''
Ejemplo n.º 2
0
class LinearRegressionModel:

    # initialize a LinearRegressionModel object with "model" attribute containing an actual LinearRegression object from the skLearn module
    def __init__(self,*args,**kwargs):
        self.model=LinearRegression(*args,**kwargs)

    # a function that returns the actual LinearRegression object which the called LinearRegressionModel object wraps around
    def get_model(self):
        return self.model

    def fit(self,X,y,sample_weight=None):
        if (isinstance(X,TabularData)):
            X=DataConversion.extract_X(X)
        if (isinstance(y,TabularData)):
            y=DataConversion.extract_y(y)
        self.model.fit(X,y,sample_weight)
        return self
    
    def get_params(self,deep=True):
        return self.model.get_params(deep)

    def predict(self,X):
        # if statement added to avoid converting TabularData twice when predict() is called inside score()
        if (isinstance(X,TabularData)):
            X=DataConversion.extract_X(X)
        return self.model.predict(X)

    def score(self,X,y,sample_weight=None):
        if (isinstance(X,TabularData)):
            X=DataConversion.extract_X(X)
        if (isinstance(y,TabularData)):
            y=DataConversion.extract_y(y)        
        return self.model.score(X,y,sample_weight)

    def set_params(self,**params):
        return self.model.set_params(**params)
    
    '''
    # for testing purposes
    def __getattribute__(self,item):
            logging.info("The function being called: "+str(item))
            if (item in ('fit','predict','model','get_model','score')):
                return super().__getattribute__(item)
    '''
    
    def __getattribute__(self,item):
        # if the called function/attribute does not require X,y tabularData conversion, get the attribute value by calling the function on the actual LinearRegression model in skLearn module
        
        # check if this object has the requested attribute
        try:
            return super().__getattribute__(item)
        except:
            pass;
        # otherwise fetch it from the actual linear regression object
        return getattr(self.model,item)        

        '''
Ejemplo n.º 3
0
def search_bestparam_LinearRegression(X_train, y_train, df_search_best_param):
    print(f"Search best params for LinearRegression ...")
    model = LinearRegression()
    print("Supported params", model.get_params())
    param_grid = {
          'normalize' : [True,False],
          'fit_intercept': [True,False]
      }
    search_bestparam(model, param_grid, X_train, y_train, df_search_best_param)
Ejemplo n.º 4
0
def sklearn_mode():
    loaded_data = datasets.load_boston()
    data_X = loaded_data.data
    data_y = loaded_data.target
    model = LinearRegression()
    model.fit(data_X, data_y)
    print(model.coef_)  # y=0.1x+0.3
    print(model.intercept_)
    print(model.get_params())
    print(model.score(data_X, data_y))  # R^2 coefficient of determination
def main():
    x, y = datasets.make_regression(100, 1, noise=5)
    x = preprocessing.scale(x)
    y = preprocessing.scale(y)
    print(x.shape, y.shape)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
    lr = LinearRegression(n_jobs=8)
    lr.fit(x_train, y_train)
    print(lr.coef_, lr.intercept_)
    print(lr.get_params())
    print(lr.score(x_test, y_test))
Ejemplo n.º 6
0
class LinearRegression(Model):

    # X represents the features, Y represents the labels
    X = None
    Y = None
    prediction = None
    model = None

    def __init__(self, X=None, Y=None, cfg=False):

        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        self.model = LinearRegressionModel()
        self.cfg = cfg

    def fit(self, X=None, Y=None):
        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        print('Linear Regression Train started............')
        self.model.fit(self.X, self.Y)
        print('Linear Regression completed..........')

        return self.model

    def predict(self, test_features):
        print('Prediction started............')
        self.predictions = self.model.predict(test_features)
        print('Prediction completed..........')
        return self.predictions

    def save(self):
        if self.cfg:
            f = open('linearregression_configs.txt', 'w')
            f.write(json.dumps(self.model.get_params()))
            f.close()
        print('No models will be saved for lasso')

    def featureImportance(self):
        #if X_headers is None:
        #    X_headers = list(self.X)
        #print(self.model.coef_)
        #feature_importance_ = zip(self.model.coef_[0], X_headers)
        #feature_importance = set(feature_importance_)

        return self.model.coef_
Ejemplo n.º 7
0
def run_regression(n_classes):
    X, y = get_scaled_data()
    y = clean_y_data(y, n_classes)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    model = LinearRegression().fit(X_train, y_train)
    print(model.get_params())

    y_pred = model.predict(X_test)
    for i in range(len(y_pred)):
        y_pred[i] = int(y_pred[i])
    print_model_info(y_test, y_pred,
                     ("Run LinearRegression for", n_classes, "classes:"))
Ejemplo n.º 8
0
def train(stock, x_df, y_df):
    # check nan
    print(x_df.isnull().values.any())
    print(y_df.isnull().values.any())

    # LinearRegression
    x = x_df[:].values
    y = y_df[:].values
    reg = LinearRegression().fit(x, y)
    res = {}
    res['stock'] = stock
    res['score'] = reg.score(x, y)
    res['params'] = reg.get_params()
    # res['coef_'] = reg.coef_
    linear_regression_results.append(res)
Ejemplo n.º 9
0
def train(stock, x_df, y_df):
    # check nan
    # print(x_df.isnull().values.any())
    # print(y_df.isnull().values.any())

    # LinearRegression
    x = x_df.values
    y = y_df.values

    print(stock)
    # print(x.shape)
    # print(y.shape)
    # x = x_df[:].values
    # y = y_df[:].values
    tc = 180
    x_train = x[:tc]
    y_train = y[:tc]
    x_test = x[tc:]
    y_test = y[tc:]
    try:
        reg = LinearRegression().fit(x_train, y_train)
    except Exception as e:
        print(e)
    res = {}
    res['stock'] = stock
    res['score'] = reg.score(x_test, y_test)
    res['params'] = reg.get_params()

    # test prediction
    pY = reg.predict(x)
    print(pY.shape)
    py_df = pd.DataFrame(pY)
    pred_df = pd.concat([y_df, py_df], axis=1)
    pred_df.to_csv(ROOT + '/data/test_pred/' + stock[0:4] + '.csv')

    # custom score
    sum = 0
    for i, predY in enumerate(pY):
        diff = predY - y[i]
        # print(diff)
        sum += (diff * diff)
    print(sum)
    res['custom_training_error_sum'] = sum
    res['custom_training_error'] = math.sqrt(sum / len(pY))

    # res['coef_'] = reg.coef_
    linear_regression_results.append(res)
Ejemplo n.º 10
0
class LinRegClassifierSKLearn(BaseEstimator, ClassifierMixin, TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.clf = LinearRegression(*args, **kwargs)
        self.threshold = 0.0
    def fit(self, X, y):
        y = (2 * y) - 1
        self.clf.fit(X, y)
        return self

    def predict(self, X):
        y = self.clf.predict(X)
        y = (2 * (y > self.threshold)) - 1
        y[y == -1] = 0
        return y

    def get_params(self, deep=True):
        return self.clf.get_params(deep=deep)
Ejemplo n.º 11
0
class LinearRegression():
    def __init__(self, fit_intercept=True, normalize=False, copy_X=True, n_jobs=1):
        self.LR = LR(fit_intercept, normalize, copy_X, n_jobs)

    def decision_function(self, x):
        return self.LR.decision_function(x)

    def fit(self, x, y):
        return self.LR.fit(x, y)

    def get_params(self):
        return self.LR.get_params()

    def predict(self, x):
        return self.LR.predict(x)

    def set_params(self, **params):
        self.LR.set_params(params)
Ejemplo n.º 12
0
class LinRegClassifierSKLearn(BaseEstimator, ClassifierMixin,
                              TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.clf = LinearRegression(*args, **kwargs)
        self.threshold = 0.0

    def fit(self, X, y):
        y = (2 * y) - 1
        self.clf.fit(X, y)
        return self

    def predict(self, X):
        y = self.clf.predict(X)
        y = (2 * (y > self.threshold)) - 1
        y[y == -1] = 0
        return y

    def get_params(self, deep=True):
        return self.clf.get_params(deep=deep)
Ejemplo n.º 13
0
def RegressionModel(model, params, features, target, scoring, kFold):
    if model == "linear regression":
        model = LinearRegression(fit_intercept=params['fit_intercept'], \
            normalize=params['normalize'], copy_X=params['copy_X'])
        print('************************************************')
        print(model.get_params())
        print('************************************************')
    else:
        print('Sorry, we are still developing other regression methods.')

    if kFold == 0:
        x_train,x_test,y_train,y_test = train_test_split(features,target, random_state = 1)
        model.fit(x_train,y_train)

        model_train_pred = model.predict(x_train)
        model_test_pred = model.predict(x_test)

        results = str()
        if "neg_mean_absolute_error" in scoring: 
            results = 'MAE train data: %.3f, MAE test data: %.3f' % (
            mean_absolute_error(y_train,model_train_pred),
            mean_absolute_error(y_test,model_test_pred)) 
        if "neg_mean_squared_error" in scoring: 
            results = results + '\n' + 'MSE train data: %.3f, MSE test data: %.3f' % (
            mean_squared_error(y_train,model_train_pred),
            mean_squared_error(y_test,model_test_pred)) 
        if "neg_mean_squared_log_error" in scoring: 
            results = results + '\n' + 'MSLE train data: %.3f, MSLE test data: %.3f' % (
            mean_squared_log_error(y_train,model_train_pred),
            mean_squared_log_error(y_test,model_test_pred)) 
        if "r2" in scoring: 
            results = results + '\n' +'R2 train data: %.3f, R2 test data: %.3f' % (
            r2_score(y_train,model_train_pred),
            r2_score(y_test,model_test_pred))

        return results

    elif kFold > 2:
        results = cross_validate(model, features, target, scoring=scoring, cv=kFold,error_score=np.nan)
        return results

    else:
        print("K-Fold has to be an integer (>=3) or 0 (No cross validation)")
Ejemplo n.º 14
0
def simple_linear(X_train, y_train, X_test, y_test):
    linear = LinearRegression()
    linear.fit(X_train, y_train)
    y_pred = linear.predict(X_test)
    print('\nLinear Regression Summary')
    print('R2:', linear.score(X_test, y_test))
    print('Intercept:', linear.intercept_, '\nCoefficients:', linear.coef_)
    print('Parameters:', linear.get_params())
    '''Predict how well model will perfom on test data'''
    # http://scikit-learn.org/stable/modules/model_evaluation.html
    # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
    # scoring='wrong' to see valid scoring options
    # Common regression metrics: R2, RMSE, quantiles/MAPE, precision accuracy
    score = cross_val_score(estimator=linear,
                            X=X_train,
                            y=y_train,
                            fit_params=None,
                            scoring='r2',
                            cv=5,
                            n_jobs=-1)
    print('Mean Cross Validation Score:', score.mean())
Ejemplo n.º 15
0
class LinRegClassifierSKLearn(BaseEstimator, ClassifierMixin, TransformerMixin):
    """
    This class implements Linear Regression classifer.

    Specifically, this class uses Linear Regression matcher from
    scikit-learn, wraps it up to form a classifier.


    """
    def __init__(self, *args, **kwargs):
        # Set the classifier to the scikit-learn Linear Regression matcher.
        self.clf = LinearRegression(*args, **kwargs)
        # Set the threshold to 0
        self.threshold = 0.0

    def fit(self, X, y):
        # Convert 0 and 1s to -1, and 1s
        y = (2 * y) - 1
        # Call the fit method of Linear Regression matcher
        self.clf.fit(X, y)
        # Return the wrapper object
        return self

    def predict(self, X):
        # Call the predict method from the underlying matcher
        y = self.clf.predict(X)
        # Convert back the predictions a number between -1 and 1 to -1 and -1
        y = (2 * (y > self.threshold)) - 1
        # Convert all the -1 to 0s
        y[y == -1] = 0
        # Return back the predictions
        return y

    def get_params(self, deep=True):
        """
        Function to get params. This will be used by other scikit-learn
        matchers.
        """
        return self.clf.get_params(deep=deep)
Ejemplo n.º 16
0
def linear_regression(df, significant_cols, target, cat_cols, num_cols):
    ss = StandardScaler()
    ohe = OneHotEncoder(drop='first', sparse=False)
    X = df[significant_cols]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=0)
    X_train_cat = ohe.fit_transform(X_train[cat_cols])
    X_train_num = ss.fit_transform(X_train[num_cols])
    X_test_cat = ohe.transform(X_test[cat_cols])
    X_test_num = ss.transform(X_test[num_cols])
    train_data = np.c_[X_train_cat, X_train_num]
    test_data = np.c_[X_test_cat, X_test_num]
    estimator = LinearRegression(n_jobs=-1)
    r2_cv_scores = cross_val_score(estimator,
                                   train_data,
                                   y_train,
                                   scoring='r2',
                                   cv=3,
                                   n_jobs=-1)
    rmse_cv_scores = cross_val_score(estimator,
                                     train_data,
                                     y_train,
                                     scoring='neg_root_mean_squared_error',
                                     cv=3,
                                     n_jobs=-1)
    r2 = np.mean(r2_cv_scores)
    rmse = np.abs(np.mean(rmse_cv_scores))
    r2_variance = np.var(r2_cv_scores, ddof=1)
    rmse_variance = np.abs(np.var(rmse_cv_scores, ddof=1))
    estimator.fit(train_data, y_train)
    y_pred = estimator.predict(test_data)
    r2_validation = r2_score(y_test, y_pred)
    rmse_validation = np.sqrt(mean_squared_error(y_test, y_pred))
    params = estimator.get_params()
    return r2, rmse, r2_variance, rmse_variance, r2_validation, rmse_validation, params
Ejemplo n.º 17
0
def sklearn_dateset_test():
    # 测试sklearn的数据库
    # 导入数据
    loaded_data = datasets.load_boston()
    data_X = loaded_data.data
    data_y = loaded_data.target
    # 定义模型(默认参数),后续可通过预测的准确度,进行调整,尝试不同的model和参数
    model = LinearRegression()
    # 训练模型
    model.fit(data_X, data_y)
    # 打印X的前4个预测值
    print("predict target:")
    print(model.predict(data_X[:4, :]))
    print("actual data:")
    print(data_y[:4])

    # LinearRegressor model的参数:斜率和截距
    print("model.coef_:\n", model.coef_)
    print("model.intercept_:\n", model.intercept_)
    # 取出之前定义的参数
    print("model.get_params:\n", model.get_params())
    # model.score(data_X, data_y)可以对Model以R^2的方式进行打分,输出精确度
    # R2越大(接近于1),所拟合的回归方程越优
    print("model.score:\n", model.score(data_X, data_y))

    # 创建虚拟数据,100个样本,1个特征,1个目标,noise越大,点越离散
    X1, y1 = datasets.make_regression(n_samples=100,
                                      n_features=1,
                                      n_targets=1,
                                      noise=10)
    X2, y2 = datasets.make_regression(n_samples=100,
                                      n_features=1,
                                      n_targets=1,
                                      noise=50)
    plt.scatter(X1, y1)
    plt.scatter(X2, y2)
    plt.show()
    return 0
Ejemplo n.º 18
0
def linear_regression():
    """

    """
    loaded_data = datasets.load_boston()
    data_x = loaded_data.data
    data_y = loaded_data.target

    model = LinearRegression()
    model.fit(data_x, data_y)
    # 斜率
    print(model.coef_)
    # 截距
    print(model.intercept_)
    print(model.get_params())
    print(model.predict(data_x[:4, :]))
    print(data_x[:4, :])

    x, y = datasets.make_regression(n_samples=100,
                                    n_features=1,
                                    n_targets=1,
                                    noise=1)
    plt.scatter(x, y)
    plt.show()
Ejemplo n.º 19
0
class LinearRegressionModel(object):
    def __init__(self):
        self.name = 'Linear Regression'
        self.clf = LinearRegression()

    def get_params(self):
        return self.clf.get_params()

    def train(self, dataframe):
        X = get_features(dataframe)
        y = get_response(dataframe)
        self.clf.fit(X, y)

    def predict(self, X):
        y_pred = self.clf.predict(X)
        return y_pred

    def save(self, filename):
        with open(filename, 'wb') as output_file:
            pickle.dump(self.clf, output_file, pickle.HIGHEST_PROTOCOL)

    def load(self, filename):
        with open(filename, 'rb') as input_file:
            self.clf = pickle.load(input_file)
Ejemplo n.º 20
0
def train_model(xy):
    # linear
    x = xy[:, 0].reshape(-1, 1)
    y = xy[:, 1]
    model = LinearRegression()
    model.fit(x, y)
    pred_y = model.predict(x)
    coef = model.coef_
    intercept = model.intercept_
    params = model.get_params()
    print(params)
    linear_r2 = sm.r2_score(y, pred_y)  # r2得分
    linear_absolute = sm.mean_absolute_error(y, pred_y)  # 平均绝对值误差
    linear_squared = sm.mean_squared_error(y, pred_y)  # 均方误差
    linear_median = sm.median_absolute_error(y, pred_y)  # 中值绝对误差
    drawing(xy, x, pred_y)
    return {
        'linear_score': {
            'linear_r2': round(linear_r2, 5),
            'linear_absolute': round(linear_absolute, 5),
            'linear_squared': round(linear_squared, 5),
            'linear_median': round(linear_median, 5)
        }
    }
Ejemplo n.º 21
0
    train_x, train_y = data[:, 2:], data[:, 0]
    return train_x, train_y, data


# '''
# X, y = make_regression(random_state=0)
# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

train_x, train_y, data = generator_data(data)

reg = LinearRegression()
print(reg)
# for i in range(10):
reg.fit(train_x, train_y)

print('the whole parameter of the model : ', reg.get_params())
# GradientBoostingRegressor(random_state=0)

pre = reg.predict(train_x)
# print('Predict regression target for x :', pre)
# print(pre.shape)
r = reg.score(train_x, train_y)
print('Return the coefficient of determination R2 of the prediction : ', r)
re_index(observed_v=train_y, predicted_v=pre)

print(keys[7:])
print(reg.coef_)

feature_importance = reg.coef_

# feature_importance=(feature_importance/feature_importance.max())
Ejemplo n.º 22
0
    print features.shape
    values = np.empty(features.shape[0], float)
    for i, res in enumerate(result.Result.all()):
        if i % 100 == 0: print i,
        if i >= num_results: break
        for j, param in enumerate(param_features):
            features[i, j] = getattr(res.spec, param)
        #values[i] = analysis.horizontal_surface_area.compute_by_result(res, flush=False)
        values[i] = analysis.horizontal_surface_area.func(res)
    print
    regression = LinearRegression()
    regression.fit(features, values, n_jobs=-1)
    print 'Features', param_features
    print 'Coeff', regression.coef_
    print 'Intercept', regression.intercept_
    print 'Params', regression.get_params()
    print 'R2', regression.score(features, values)
elif arg == 'dump':
    from biofilm import util
    path = util.results_h5_path(sys.argv[2])
    util.set_h5(path)

    import numpy as np
    from biofilm.model import spec, result, analysis

    param_features = []
    for param, r in param_ranges.iteritems():
        if isinstance(r, tuple):
            param_features.append(param)

    with open(path + '.dump', 'w') as dump:
regressor = LinearRegression()
regressor.fit(X_poly, yTrain)
print('dvj')

# Calculate errors
XTest_poly = poly_reg.fit_transform(XTest)
yTestPredict = regressor.predict(XTest_poly)
mse = mean_squared_error(yTest, yTestPredict, squared=True)
rmse = mean_squared_error(yTest, yTestPredict, squared=False)
mae = mean_absolute_error(yTest, yTestPredict)
mape = mean_absolute_percentage_error(yTest, yTestPredict)
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
print("The root Mean Square Error (RMSE) on test set: {:.4f}".format(rmse))
print("The mean absolute error on test set: {:.4f}".format(mae))
print("The mean absolute percentage error on test set: {:.4f}".format(mape))
print(regressor.get_params(deep=True))

# prediction part
Order_API_Concurrency = 5
Carts_API_Concurrency = 5
Order_Cores = 0.2
Order_DB_Cores = 0.2
Carts_Cores = 0.2
Carts_DB_Cores = 0.2

new_X = [
    Order_API_Concurrency, Carts_API_Concurrency, Order_Cores, Order_DB_Cores,
    Carts_Cores, Carts_DB_Cores
]
print()
print('X value ', new_X)
Ejemplo n.º 24
0
from sklearn.linear_model import LinearRegression
# 通用的学习模式
loaded_data = datasets.load_boston()  # 加载房价的数据库
data_X = loaded_data.data
data_y = loaded_data.target

model = LinearRegression()  # 调用线性回归模式
model.fit(data_X, data_y)  # 训练

print(model.predict(data_X[:4, :]))  # 测试
print(data_y[:4])

print(model.coef_)  # 斜率,即输入特征的各比重
print(model.intercept_)  # 截距
print(model.get_params())  # 返回model定义时的参数
# {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}
print(model.score(data_X, data_y))  # 将数据及结果传入,给线性模型打分,准确度

import matplotlib.pyplot as plt

# 生成数据集X,对应的线性结果集y
X, y = datasets.make_regression(n_samples=100,
                                n_features=1,
                                n_targets=1,
                                noise=10)
print(X[:5, :])
plt.scatter(X, y)
plt.show()

from sklearn import preprocessing
#!/usr/bin/python3
# coding: utf-8
from sklearn import datasets
from sklearn.linear_model import LinearRegression
##################################################################
## 加载数据
loaded_data = datasets.load_boston(); print(loaded_data)
data_X = loaded_data.data
data_y = loaded_data.target
##################################################################
## 加载 Model
model = LinearRegression()
model.fit(data_X, data_y)
##################################################################
## Model attribute && method
# 下面的要在 Model fit() 结束以后执行
print(model.predict(data_X[:4, :]))
print(model.coef_)  # 系数, 很多; y = 0.1x + 0.3 中的 0.1
print(model.intercept_)  # 常数; 和 y 轴的交点
print(model.get_params())  # LinearRegression() 定义的参数
print(model.score(data_X, data_y))  # R^2 coefficient of determination; 使用参数进行打分
Ejemplo n.º 26
0

# %% [markdown]
# # Load Data:
# %%
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.8,
                                                    random_state=0)

# %% [markdown]
# # Linear Regression:
# %%
model_linreg = LinearRegression()
print_dict(model_linreg.get_params(), 'LinearRegressor params:')
model_linreg.fit(X_train, y_train)
y_predict_linreg = model_linreg.predict(X_test)
y_predict_linreg = np.round(y_predict_linreg).astype(
    int)  # Regressor -> classifier!
error_rate_linreg = test(y_predict_linreg, y_test)
print(f'Linear Regressor score: {model_linreg.score(X_test, y_test):.3g}')

# %% [markdown]
# # Naive Bayesian:
# %%
model_bayes = CategoricalNB()
print_dict(model_bayes.get_params(), 'CategoricalNB params:')
model_bayes.fit(X_train, y_train)
y_predict_bayes = model_bayes.predict(X_test)
error_rate_bayes = test(y_predict_bayes, y_test)
Ejemplo n.º 27
0
Y_Target = df.iloc[:, -1]
padronizacao = StandardScaler().fit(X_Data)
X_p = padronizacao.transform(X_Data)

d_Test = pd.read_csv('dados/test.csv')
colunas = ('NU_NOTA_CN', 'NU_NOTA_CH', 'NU_NOTA_LC', 'NU_NOTA_REDACAO')
#colunas = ('NU_NOTA_CN','NU_NOTA_LC')
df_test = d_Test.loc[:, colunas]

df_test.update(df_test.fillna(-236))

X_Data_Test = df_test
X_r = padronizacao.transform(X_Data_Test)

ols = LinearRegression()
print(ols.get_params().keys())
ols_params = {'fit_intercept': [True, False], 'normalize': [True, False]}

X_train, X_test, Y_train, Y_test = train_test_split(X_p,
                                                    Y_Target,
                                                    test_size=0.25,
                                                    random_state=5)
ols.fit(X_p, Y_Target)

pred_train = ols.predict(X_train)
pred_test = ols.predict(X_test)
final = ols.predict(X_r)
print(ols.score(X_test, Y_test))

lista = final
x = 0
Ejemplo n.º 28
0
#########################

# Create some simple data
# import pandas as pd
# df = pd.read_csv('09-regression-test.csv')

import numpy as np
X = [[50, 80], [80, 65], [60, 60], [95, 80], [95, 50], [40, 90]]  # Features
y = [65, 83, 69, 92, 84, 55]

# Fit a linear regression to it
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=True)
model.fit(X, y)

# Report Results
print 'intercept_ :', model.intercept_
print 'coef_ :', model.coef_
print 'get_params :', model.get_params()

# Model the prediction
predict_data = [[52, 81], [81, 66], [60, 62], [0, 8]]
y_hat = model.predict(predict_data)

# # Plot the data
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(X, y, 'o')
ax.plot(predict_data, y_hat)
plt.show()
lm_multi2.coef_

lm_multi1.score(X1,y)
lm_multi2.score(X2,y)

sns.regplot(x = 'highway-mpg', y = 'price', data = df)
sns.regplot(x = 'peak-rpm', y = 'price', data = df)

df[['highway-mpg','peak-rpm','price']].corr()

sns.residplot(x=df['highway-mpg'],y=df['price'],lowess=True)

lm.fit(X1,y)
dir(lm)
lm._decision_function(X1)
lm.get_params(True)
lm._get_tags()

y_hat = lm.predict(X1)

def PlotPolly(model, independent_variable, dependent_variabble, Name):
    x_new = np.linspace(15, 55, 100)
    y_new = model(x_new)

    plt.plot(independent_variable, dependent_variabble, '.', x_new, y_new, '-')
    plt.title('Polynomial Fit with Matplotlib for Price ~ Length')
    ax = plt.gca()
    ax.set_facecolor((0.898, 0.898, 0.898))
    fig = plt.gcf()
    plt.xlabel(Name)
    plt.ylabel('Price of Cars')
Ejemplo n.º 30
0
#--------------------------------------------
# 交差検証:テスト実施
#--------------------------------------------
z = 0  # 訓練で一番良かったものをセット
y_pred = base_model[z].predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(
    "**** Test     set score( {} ):  MSE={:.3f}  RMSE={:.3f}  MAE={:.3f}  Score={:.3f} ****"
    .format(z, round(mse, 3), round(np.sqrt(mse), 3), round(mae, 3),
            regr.score(X_test, y_test)))
print('Parameters currently in use:')

from pprint import pprint
pprint(regr.get_params())

# 過学習気味?なのでチューニングしてみる

# # 5. RandomForest(ランダムフォレスト)
#
# * n_estimators =フォレスト内の樹木の数
# * max_features =ノードの分割に考慮されるフィーチャの最大数
# * max_depth =各決定木のレベルの最大数
# * min_samples_split =ノードが分割される前にノードに配置されたデータポイントの最小数
# * min_samples_leaf =リーフノードで許容されるデータポイントの最小数
# * bootstrap =データポイントをサンプリングする方法(置換の有無にかかわらず)

# In[26]:

# データをリセット
Ejemplo n.º 31
0
def scikit_tutorial():
    """
    scikit-learn入门
    :return:
    """
    # 1.准备数据集
    X = np.random.randint(0, 100, (10, 4))
    y = np.random.randint(0, 3, 10)
    y.sort()
    print('样本:')
    print(X)
    print('标签:', y)

    # 分割训练集、测试集
    # random_state确保每次随机分割得到相同的结果  random_state:是随机数的种子
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=1 / 3.,
                                                        random_state=7)
    print('训练集:')
    print(X_train)
    print(y_train)
    print('测试集:')
    print(X_test)
    print(y_test)

    # 特征归一化
    x1 = np.random.randint(0, 1000, 5).reshape(5, 1)
    x2 = np.random.randint(0, 10, 5).reshape(5, 1)
    x3 = np.random.randint(0, 100000, 5).reshape(5, 1)
    print(x1)
    print(np.random.randint(0, 1000, (5, 1)))
    X = np.concatenate([x1, x2, x3], axis=1)
    print(X)
    print(preprocessing.scale(X))

    # 生成分类数据进行验证scale的必要性
    X, y = make_classification(n_samples=300,
                               n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               random_state=25,
                               n_clusters_per_class=1,
                               scale=100)

    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.show()

    # 注释掉以下这句表示不进行特征归一化
    # X = preprocessing.scale(X)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=1 / 3.,
                                                        random_state=7)
    svm_classifier = svm.SVC()
    svm_classifier.fit(X_train, y_train)
    svm_classifier.score(X_test, y_test)

    # 2.训练模型
    # 回归模型
    boston_data = datasets.load_boston()
    X = boston_data.data
    y = boston_data.target
    print('样本:')
    print(X[:5, :])
    print('标签:')
    print(y[:5])

    # 选择线性回顾模型
    lr_model = LinearRegression()
    # 分割训练集、测试集
    X_train, y_train, X_test, y_test = train_test_split(X,
                                                        y,
                                                        test_size=1 / 3,
                                                        random_state=7)
    # 训练模型
    lr_model.fit(X_train, y_train)
    # 返回参数
    lr_model.get_params()
    lr_model.score(X_train, y_train)
    lr_model.score(X_test, y_test)

    # 3.交叉验证
    # K最近邻分类
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=1 / 3.,
                                                        random_state=10)

    k_range = range(1, 31)
    cv_scores = []
    for n in k_range:
        knn = KNeighborsClassifier(n)
        scores = cross_val_score(knn,
                                 X_train,
                                 y_train,
                                 cv=10,
                                 scoring='accuracy')  # 分类问题使用
        # scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='neg_mean_squared_error') # 回归问题使用
        cv_scores.append(scores.mean())

    plt.plot(k_range, cv_scores)
    plt.xlabel('K')
    plt.ylabel('Accuracy')
    plt.show()
    # 选择最优的K
    best_knn = KNeighborsClassifier(n_neighbors=5)
    best_knn.fit(X_train, y_train)
    print(best_knn.score(X_test, y_test))
    print(best_knn.predict(X_test))
Ejemplo n.º 32
0
def linear_model(x, attr, xvars, fit_intercept=None, name=None, 
        cut=None, residuals=True, quiet=True,
        model='LinearRegression'):
    """Make a linear model for attr based on xvars as free parameters.
       Currently only model='LinearRegression' implmented.
       Uses scikit-learn.
       residuals:  Name of attribute for residuals (default: attr+"_residuals")
    """
    if model is not 'LinearRegression':
        raise Exception("Currently only model='LinearRegression' implmented.")

    from sklearn.linear_model import LinearRegression
    import pandas as pd
    import numpy as np
    import xarray as xr
    lm = LinearRegression()  
    if not name:
        name = '{:}_model'.format(attr)

    if not quiet:
        print '\nUsing scikit-learn LinearRegression to build model for {:} from variables:\n  {:}'.format(attr, str(xvars))

    allattrs = xvars + [attr]
    if cut:
        allattrs += [cut]

    xx = x.reset_coords()[allattrs].where(np.isfinite(x.reset_coords()[attr]), drop=True)
    df_xvars0 = xx[xvars].to_dataframe()
    if cut:
        df_xvars = xx[xvars].where(xx[cut] == 1, drop=True).to_dataframe()
        xdata = xx[attr].where(xx[cut] == 1, drop=True).data
        if not quiet:
            print '\nUsing following cut in buildiing model'
            print xx[cut]
    else:
        df_xvars = df_xvars0
        xdata = xx[attr].data
    
    if fit_intercept is not None:
        lm.fit_intercept = fit_intercept
    
    lm.fit(df_xvars, xdata) 
    #ft = pd.DataFrame(zip(df_xvars.columns,lm.coef_), columns=['params','estimatedCoefficients'])
   
    x[name] = xr.DataArray(lm.predict(df_xvars0), coords=[('time', df_xvars0.index)])
    #x[name] = (['time'], lm.predict(df_xvars0))
    x[name].attrs.update(**lm.get_params())
    x[name].attrs['unit'] = x[attr].attrs.get('unit','')
    x[name].attrs['doc'] = 'LinearRegression scikit-learn model for {:} training data'.format(attr)
    x[name].attrs['model'] = model
    x[name].attrs['variables'] = xvars
    x[name].attrs['coef_'] = lm.coef_
    x[name].attrs['intercept_'] = lm.intercept_
    x[name].attrs['score'] = lm.score(df_xvars, xdata)
    if not quiet:
        print '\n****Model Results****'
        print x.reset_coords()[name]

    if residuals:
        if not isinstance(residuals, str):
            residuals = '{:}_residuals'.format(attr)

        x[residuals] = (['time'], x[attr]-x[name])
        x[residuals].attrs['doc'] = 'Residuals for {:} based on LinearRegression model {:}'.format(attr, name)
        if not quiet:
            print '\n****Model Residuals****'
            print x.reset_coords()[residuals]

    return x
Ejemplo n.º 33
0
from sklearn import datasets
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

#使用以后的数据集进行线性回归
loaded_data = datasets.load_boston()
data_X = loaded_data.data
data_y = loaded_data.target

model = LinearRegression()
model.fit(data_X, data_y)

print(model.predict(data_X[:4, :]))
print(data_y[:4])

#参数
print(model.coef_)  #如果y=0.1x+0.3   则此行输出的结果为0.1
print(model.intercept_)  #此行输出的结果为0.3
print(model.get_params())  #模型定义时定义的参数,如果没有定义则返回默认值
print(model.score(
    data_X,
    data_y))  #给训练模型打分,注意用在LinearR中使用R^2 conefficient of determination打分
Ejemplo n.º 34
0
time_series['trend'] = range(time_series.shape[0])
time_series['month'] = time_series['month'].astype('category')

####dropping columns

X = time_series.drop(['week', 'year', 'date', 'total_sales'], axis=1)

names = pd.get_dummies(X).columns
X = pd.get_dummies(X).values
y = time_series.total_sales.values

model = LinearRegression()

model.fit(X, y)

model.get_params()
model.coef_

dict1 = list(zip(names, model.coef_))

prediction = model.predict(X)

time_series['prediction'] = prediction
import matplotlib.pyplot as plt

plt.plot(time_series.date, time_series.total_sales, label='Actual')
plt.plot(time_series.date, time_series.prediction, label='prediction')
plt.legend(loc='upperleft')
plt.show()

#####forecasting
Ejemplo n.º 35
0
from sklearn import datasets
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

loaded_data = datasets.load_boston()

data_X = loaded_data.data
data_Y = loaded_data.target

model = LinearRegression()
model.fit(data_X, data_Y)

print(model.predict(data_X[:4, :]))
print(data_Y[:4])
print(model.coef_)  # y = 0.1x + 0.3
print(model.intercept_)
print(model.get_params())

# R^2 Coefficient of Determnination, How similar between data and target
print(model.score(data_X, data_Y))

# X, Y = datasets.make_regression(
#     n_samples=100, n_features=1, n_targets=1, noise=0.001)

# plt.scatter(X, Y)
# plt.show()
Ejemplo n.º 36
0
from sklearn import datasets
from sklearn.linear_model import LinearRegression

loaded_data = datasets.load_boston()
data_X = loaded_data.data
data_y = loaded_data.target

model = LinearRegression()
model.fit(data_X, data_y)

print(model.coef_)  # y=ax + b print out a
print(model.intercept_)  #print out b
print(model.get_params())
print(model.score(data_X, data_y))  #R^2 coefficient of determination
Ejemplo n.º 37
0
class LinRegClassifierSKLearn(BaseEstimator, ClassifierMixin, TransformerMixin):
    """
    This class implements Linear Regression classifer.

    Specifically, this class uses Linear Regression matcher from
    scikit-learn, wraps it up to form a classifier.


    """
    def __init__(self, *args, **kwargs):
        # Set the classifier to the scikit-learn Linear Regression matcher.
        self.clf = LinearRegression(*args, **kwargs)
        # Set the threshold to 0
        self.threshold = 0.0
        # Set the classes_
        self.classes_ = np.array([0, 1], np.int64)

    def fit(self, X, y):
        # Convert 0 and 1s to -1, and 1s
        y = (2 * y) - 1
        # Call the fit method of Linear Regression matcher
        self.clf.fit(X, y)
        # Return the wrapper object
        return self

    def predict(self, X):
        # Call the predict method from the underlying matcher
        y = self.clf.predict(X)
        # Convert back the predictions a number between -1 and 1 to -1 and -1
        y = (2 * (y > self.threshold)) - 1
        # Convert all the -1 to 0s
        y[y == -1] = 0
        # Return back the predictions
        return y

    def predict_proba(self, X):
        # There is no proba function defined for Linear Regression Matcher in scikit
        # learn. So we return the probs as 0 or 1

        # give the warning to the user
        logger.warning('There is no proba function defined for Linear Regression '
                       'Matcher in scikit learn. So we return the probs as 1')

        y = self.predict(X)
        p = np.ndarray(shape=[len(y), 2])

        for i in range(len(y)):
            if y[i] == 1:
                p[i][0] = 0
                p[i][1] = 1
            elif y[i] == 0:
                p[i][0] = 1
                p[i][1] = 0

        return p

    def get_params(self, deep=True):
        """
        Function to get params. This will be used by other scikit-learn
        matchers.
        """
        return self.clf.get_params(deep=deep)