예제 #1
0
reg = LinearRegression().fit(X_product, y)

line_product = np.hstack([line_binned, line * line_binned])
plt.plot(line, reg.predict(line_product), label="bined * origin reg")

for bin in bins:
    plt.plot([bin, bin], [-3, 3], ":", c="k", linewidth=1)
plt.legend(loc="best")
plt.ylabel("reg print")
plt.xlabel("input")
plt.plot(X[:, 0], y, "o", c="k")

#다항식 추가
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=10,
                          include_bias=False)  #degree=n : X^n차항까지 추가
poly.fit(X)
X_poly = poly.transform(X)
print("X_poly.shape : {}".format(X_poly.shape))

print("X element :\n{}".format(X[:5]))
print("X_poly element : \n{}".format(X_poly[:5]))
print("section name :{}".format(poly.get_feature_names()))

#다항식 추가한 회귀(단 : 데이터 없는 곳에선 과적합)
reg = LinearRegression().fit(X_poly, y)

line_poly = poly.transform(line)
plt.plot(line, reg.predict(line_poly), label="multinomial reg")
plt.legend(loc="best")
plt.ylabel("reg print")
예제 #2
0
y = dataset.iloc[:, 2].values

# Training and test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.2,
                                                    random_state=0)

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# fitting Polynomial Ression to the @staticmethod
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=5)
X_poly = poly_reg.fit_transform(X)

poly_reg.fit(X_poly, y)
lin_reg2 = LinearRegression()
lin_reg2.fit(X_poly, y)

plt.scatter(X, y, color='red')
plt.plot(X, lin_reg2.predict(poly_reg.fit_transform(X)), color='blue')

plt.title('Postopn vs Salaries')
plt.xlabel('Postion level')
plt.ylabel('Salary')
plt.show()
         label=r'Lasso; $\alpha = 0.00001$')  # alpha here is for transparency
plt.plot(lr.coef_,
         alpha=0.7,
         linestyle='none',
         marker='o',
         markersize=5,
         color='green',
         label='Linear Regression',
         zorder=2)
plt.xlabel('Coefficient Index', fontsize=16)
plt.ylabel('Coefficient Magnitude', fontsize=16)
plt.legend(fontsize=13, loc=4)
plt.tight_layout()
plt.show()
# %%
steps = [('scalar', StandardScaler()), ('poly', PolynomialFeatures(degree=2)),
         ('model', LinearRegression())]

pipeline = Pipeline(steps)

pipeline.fit(X_train, y_train)
y_pred_linear = pipeline.predict(X_test)

print('Training score: {}'.format(pipeline.score(X_train, y_train)))
print('Test score: {}'.format(pipeline.score(X_test, y_test)))
print('RMSE: {}'.format(
    np.sqrt(metrics.mean_squared_error(y_test, y_pred_linear))))

# %%
steps = [('scalar', StandardScaler()), ('poly', PolynomialFeatures(degree=2)),
         ('model', Ridge(alpha=10, fit_intercept=True))]
예제 #4
0
v.feature_names_

#결측치 처리
from sklearn.preprocessing import Imputer

imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp.fit_transform([[1, 2], [np.nan, 3], [7, 6]])
#파생변수 생성
# [1, a, b, a^2, ab, b^2]

from sklearn.preprocessing import PolynomialFeatures

X = np.arange(6).reshape(3, 2)
X

poly = PolynomialFeatures(2)
poly.fit_transform(X)
## [1, a, b, a^2, ab, b^2]

#apply와 같은 효과
from sklearn.preprocessing import FunctionTransformer


def all_but_first_column(X):
    return X[:, 1:]


X = np.arange(12).reshape(4, 3)
X

FunctionTransformer(all_but_first_column).fit_transform(X)
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)

# Different regression algorithms
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

from sklearn.preprocessing import PolynomialFeatures

regressor = PolynomialFeatures(degree=4)
X_poly = regressor.fit_transform(X)

from sklearn.svm import SVR

regressor = SVR(kernel="rbf")

from sklearn.tree import DecisionTreeRegressor

regressor = DecisionTreeRegressor()

from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=100)

# Fitting and predicting
예제 #6
0
y = y[:-forecast_out]

print('Dimension of X', X.shape)
print('Dimension of X_lately', X_lately.shape)
print('Dimension of y', y.shape)

# Separation of training and testing of model by cross validation train test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2)

# Linear regression
clfreg = LinearRegression(n_jobs=-1)
clfreg.fit(X_train, y_train)

# Quadratic Regression 2
clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
clfpoly2.fit(X_train, y_train)

# Quadratic Regression 3
clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
clfpoly3.fit(X_train, y_train)

# KNN Regression
clfknn = KNeighborsRegressor(n_neighbors=2)
clfknn.fit(X_train, y_train)

confidencereg = clfreg.score(X_test, y_test)
confidencepoly2 = clfpoly2.score(X_test, y_test)
confidencepoly3 = clfpoly3.score(X_test, y_test)
confidenceknn = clfknn.score(X_test, y_test)
예제 #7
0
onehotEncr = OneHotEncoder(categorical_features=[0])
#onehotEncr = OneHotEncoder(categorical_features=[1])

X = onehotEncr.fit_transform(X).toarray()

labEnc_Y = LabelEncoder()
Y = labEnc_Y.fit_transform(Y)

plt.scatter(X[:,2], Y, marker='o')


"""New"""

np.random.seed(0) 
poly_features = PolynomialFeatures(degree=2, include_bias=False) 
X = poly_features.fit_transform(X) 
model = SGDRegressor(max_iter=10000, eta0=0.001) 

model.fit(X,Y) 
print('Coeff R2 =', model.score(X, Y)) 

plt.scatter(X[:,4], Y, marker='o')
plt.scatter(X[:,0], model.predict(X), c='red', marker='+') 






예제 #8
0
 def train_model_lassoLARS_style(predictors, predictants, alpha, deg):
     Vandermonde = PolynomialFeatures(degree=deg)
     Vandermonde = Vandermonde.fit_transform(predictors)
     LinModel = linear_model.LassoLars(alpha=alpha)
     LinModel = LinModel.fit(Vandermonde, predictants)
     return LinModel
예제 #9
0
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#Let's examine the distribution of the predicted values of the training data.
Title = 'Distribution  Plot of  Predicted Value Using Training Data vs Training Data Distribution'
DistributionPlot(y_train, yhat_train, "Actual Values (Train)", "Predicted Values (Train)", Title)

Title='Distribution  Plot of  Predicted Value Using Test Data vs Data Distribution of Test Data'
DistributionPlot(y_test,yhat_test,"Actual Values (Test)","Predicted Values (Test)",Title)
from sklearn.preprocessing import PolynomialFeatures
#Overfitting
#Let's use 55 percent of the data for testing and the rest for training:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.45, random_state=0)    
#We will perform a degree 5 polynomial transformation on the feature 'horse power'.

pr = PolynomialFeatures(degree=5)
x_train_pr = pr.fit_transform(x_train[['horsepower']])
x_test_pr = pr.fit_transform(x_test[['horsepower']])
pr    
#Now let's create a linear regression model "poly" and train it.    
poly = LinearRegression()
poly.fit(x_train_pr, y_train)
#We can see the output of our model using the method "predict." then assign the values to "yhat".
yhat = poly.predict(x_test_pr)
yhat[0:5]
#Let's take the first five predicted values and compare it to the actual targets.
print("Predicted values:", yhat[0:4])
print("True values:", y_test[0:4].values)

PollyPlot(x_train[['horsepower']], x_test[['horsepower']], y_train, y_test, poly,pr)
#R^2 of the training data:
import pylab as plt
import pandas as pd

#importing dataset
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

#fitting linear regression
from sklearn.linear_model import LinearRegression
lin_regressor = LinearRegression()
lin_regressor.fit(X, y)

#fitting polynomial regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_regressor = PolynomialFeatures(degree=2)
X_poly = poly_regressor.fit_transform(X)
lin_regressor2 = LinearRegression()
lin_regressor2.fit(X_poly, y)

#visualization of linear regression resutls
plt.scatter(X, y, color='red')
plt.plot(X, lin_regressor.predict(X))
plt.title('Linear Regression (Truth)')
plt.xlabel('Positoin Level')
plt.ylabel('Salary')
plt.show()

#visualization of polynomial linear regression
plt.scatter(X, y, color='red')
plt.plot(X, lin_regressor2.predict(poly_regressor.fit_transform(X)))
예제 #11
0
    }
    return results


def runLR(train_X, train_y, test_X, test_y, test_X2, params):
    print('Train LR')
    model = RandomForestClassifier(**params)
    model.fit(train_X, train_y)
    print('Predict 1/2')
    pred_test_y = model.predict_proba(test_X)[:, 1]
    print('Predict 2/2')
    pred_test_y2 = model.predict_proba(test_X2)[:, 1]
    return pred_test_y, pred_test_y2


target = train['redemption_status'].values
poly = PolynomialFeatures(degree=2)
sc = StandardScaler()
lr_params = {'n_estimators': 1000}
results = run_cv_model(
    sc.fit_transform(poly.fit_transform(train[train_cols].fillna(0).values)),
    sc.fit_transform(poly.fit_transform(test[train_cols].fillna(0).values)),
    target, runLR, lr_params, auc, 'lr')
day = 2
sub = 3
name = f"day_{day}_sub_{sub}"
tmp = dict(zip(test.id.values, results['test']))
answer1 = pd.DataFrame()
answer1['id'] = test.id.values
answer1['redemption_status'] = answer1['id'].map(tmp)
answer1.to_csv(f'{name}.csv', index=None)
예제 #12
0
# imputer is for handling missing values
from sklearn.preprocessing import Imputer
imputer = Imputer(strategy='median')

poly_target = poly_features['TARGET']

poly_features = poly_features.drop(columns=['TARGET'])

# Need to impute missing values
poly_features = imputer.fit_transform(poly_features)
poly_features_test = imputer.transform(poly_features_test)

from sklearn.preprocessing import PolynomialFeatures

# Create the polynomial object with specified degree
poly_transformer = PolynomialFeatures(degree=3)
# Train the polynomial features
poly_transformer.fit(poly_features)

# Transform the features
poly_features = poly_transformer.transform(poly_features)
poly_features_test = poly_transformer.transform(poly_features_test)
#print('Polynomial Features shape: ', poly_features.shape)
#print(poly_transformer.get_feature_names(input_features = ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH'])[:34])

poly_features = pd.DataFrame(poly_features,
                             columns=poly_transformer.get_feature_names([
                                 'EXT_SOURCE_1', 'EXT_SOURCE_2',
                                 'EXT_SOURCE_3', 'DAYS_BIRTH'
                             ]))
예제 #13
0
def PolynomialLogisticRegression(degree):
    return Pipeline([('poly', PolynomialFeatures(degree=degree)),
                     ('std_scaler', StandardScaler()),
                     ('log_reg', LogisticRegression())])
예제 #14
0
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

from helper import get_data, split_data, visualize

name = 'Polynomial'

if __name__ == '__main__':
    x, y = get_data()
    x_train, x_test, y_train, y_test = split_data(x, y)

    pf = PolynomialFeatures()
    x_train_polynomial = pf.fit_transform(x_train)
    x_test_polynomial = pf.fit_transform(x_test)

    regression = LinearRegression()
    regression.fit(x_train_polynomial, y_train)

    y_predicted = regression.predict(x_test_polynomial)

    visualize(y_test, y_predicted, name)
예제 #15
0
#1. kutuphaneler
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt

# veri yukleme
df = pd.read_csv('winequality-red.csv')

X = df[['quality']]
y = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = PolynomialFeatures(degree=4)
y_ = model.fit_transform(y)
y_test_ = model.fit_transform(y_test)
print(y_, y_test_)
lg = LinearRegression()
lg.fit(y_,X)
predicted_data = lg.predict(y_test_)
predicted_data = np.round(predicted_data)

print(mean_squared_error(X_test,predicted_data))

print(predicted_data)
예제 #16
0
normalizer = preprocessing.Normalizer().fit(X)
print(normalizer.transform(X))

# Binarization
# Feature binarization
print("Feature binarization")
X = [[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]]
binarizer = preprocessing.Binarizer().fit(X)
print(binarizer.transform(X))

# Imputation of missing values
print("Imputation of missing values")
from sklearn.preprocessing import Imputer

imp = Imputer(missing_values='NaN', strategy='mean', verbose=0)
imp.fit([[1, 2], [np.nan, 3], [7, 6]])
X = [[np.nan, 2], [6, np.nan], [7, 6]]
print(imp.transform(X))

# Generating polynomial features
print("Generating polynomial features")
from sklearn.preprocessing import PolynomialFeatures

X = np.arange(6).reshape(3, 2)
poly = PolynomialFeatures(2)
print(poly.fit_transform(X))

X = np.arange(9).reshape(3, 3)
poly = PolynomialFeatures(degree=3, interaction_only=True)
print(poly.fit_transform(X))
예제 #17
0
def predictStockPrices(df):
    dfreg = df.loc[:, ['Adj Close', 'Volume']]
    dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
    dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0
    print(dfreg.tail())

    # cleanup process
    # --------------------------------------------------------------------

    # drop missing values
    dfreg.fillna(value=-99999, inplace=True)

    # we want to separate 1 percent of the data to forecast
    forecast_out = int(math.ceil(0.01 * len(dfreg)))
    # we want to predict the AdjClose
    forecast_col = 'Adj Close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))

    # scale the X so that everyone can have the same distribution for linear regression
    X = sk.preprocessing.scale(X)

    # find data series of late X and early X (train) for model generation and evaluation
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]

    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    print('Dimension of X', X.shape)
    print('Dimension of y', y.shape)

    # Separation of training and testing of model by cross validation train test split
    X_train, X_test, y_train, y_test = sk.model_selection.train_test_split(
        X, y, test_size=0.2)

    # Linear Regression
    clfreg = LinearRegression(n_jobs=-1)
    clfreg.fit(X_train, y_train)

    # Lasso Regression
    clflasso = Lasso(selection='random')
    clflasso.fit(X_train, y_train)

    # Quadratic Regression 2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
    clfpoly2.fit(X_train, y_train)

    # Quadratic Regression 3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
    clfpoly3.fit(X_train, y_train)

    # KNN
    clfknn = KNeighborsRegressor(n_neighbors=2)
    clfknn.fit(X_train, y_train)

    # Test the models
    confidencereg = clfreg.score(X_test, y_test)
    confidencepoly2 = clfpoly2.score(X_test, y_test)
    confidencepoly3 = clfpoly3.score(X_test, y_test)
    confidenceknn = clfknn.score(X_test, y_test)
    confidencelasso = clflasso.score(X_test, y_test)

    print("The linear regression confidence is ", confidencereg)
    print("The quadratic regression 2 confidence is ", confidencepoly2)
    print("The quadratic regression 3 confidence is ", confidencepoly3)
    print("The knn regression confidence is ", confidenceknn)
    print("The knn lasso confidence is ", confidencelasso)

    # Predict
    predictAndPlot(clfreg, X_lately, dfreg.copy(), confidencereg, forecast_out)
    predictAndPlot(clfpoly2, X_lately, dfreg.copy(), confidencepoly2,
                   forecast_out)
    predictAndPlot(clfpoly3, X_lately, dfreg.copy(), confidencepoly3,
                   forecast_out)
    predictAndPlot(clfknn, X_lately, dfreg.copy(), confidenceknn, forecast_out)
    predictAndPlot(clflasso, X_lately, dfreg.copy(), confidencelasso,
                   forecast_out)
예제 #18
0
 def _get_polynomials(self, x):
     poly = PolynomialFeatures(degree=self.degree)
     x_poly = poly.fit_transform(x.reshape(-1, 1))
     return x_poly
-1.17165272, -0.89129801,  -0.85572252, -0.7736467,  -0.21234812,-0.12717219])
 
x_test = np.array([0.31273956 , 0.46122891, 0.4917774, 0.7039386, 0.84386983, 0.97020886])
 
y_test = np.array([0.909136, 0.38747724, -0.92084687, -1-0.03804487,.03453301,-0.1177253])
 
# create matrix versions of these arrays
X_train = x_train[:, np.newaxis]
X_test = x_test[:, np.newaxis]
 
colors = ['teal', 'yellow' ,'green', 'gold']
lw = 2
 
train_error = []
test_error = []
 
 
for degree in range(11):
    for count, degree in enumerate([degree]):
        fig.clf()
        model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha = 0))
        model.fit(X_train, y_train)
        train_error.append(mean_squared_error(y_train, model.predict(X_train)))
        test_error.append(mean_squared_error(y_test, model.predict(X_test)))
plt.plot(np.arange(11), train_error, color='green', label='train')
plt.plot(np.arange(11), test_error, color='red', label='test')
plt.ylim((0.0, 1e0))
plt.ylabel('Mean Squared Error)')
plt.xlabel('Degree')
plt.legend(loc='lower left')
fig.savefig('Testing_Answer4_1.png', bbox_inches='tight')
from scipy.stats import boxcox

X_train_transformed = X_train.copy()
X_train_transformed['Fare'] = boxcox(X_train_transformed['Fare'] + 1)[0]
X_test_transformed = X_test.copy()
X_test_transformed['Fare'] = boxcox(X_test_transformed['Fare'] + 1)[0]
# Rescale data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_transformed_scaled = scaler.fit_transform(X_train_transformed)
X_test_transformed_scaled = scaler.transform(X_test_transformed)
# Get polynomial features
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2).fit(X_train_transformed)
X_train_poly = poly.transform(X_train_transformed_scaled)
X_test_poly = poly.transform(X_test_transformed_scaled)
# Debug
print(poly.get_feature_names())
# Select features using chi-squared test
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

## Get score using original model
logreg = LogisticRegression(C=1)
logreg.fit(X_train, y_train)
scores = cross_val_score(logreg, X_train, y_train, cv=10)
print('CV accuracy (original): %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
highest_score = np.mean(scores)
예제 #21
0
def true_fun(X):
    return np.sin(2 * np.pi * X)

np.random.seed(0)

n_samples = 30
degrees = [1, 4, 15]
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1

plt.figure(figsize=(7, 2.5))
for i in range(len(degrees)):
    ax = plt.subplot(1, len(degrees), i + 1)
    plt.setp(ax, xticks=(), yticks=())
    polynomial_features = PolynomialFeatures(degree=degrees[i], include_bias=False)
    linear_regression = LinearRegression()
    pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)])
    pipeline.fit(X[:, np.newaxis], y)

    # Evaluate the models using crossvalidation
    scores = cross_val_score(pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=10)

    X_test = np.linspace(0, 1, 100)
    plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model")
    plt.plot(X_test, true_fun(X_test), label="True function")
    plt.scatter(X, y, edgecolor='b', s=20, label="Samples")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xlim((0, 1))
    plt.ylim((-2, 2))
예제 #22
0

if __name__ == "__main__":
    np.random.seed(0)
    N = 200
    # x = np.random.rand(N) * 10 - 5  # [-5,5)
    # x = np.sort(x)
    x = np.linspace(-5, 5, N)
    y = f(x) + 0.05 * np.random.randn(N)
    x.shape = -1, 1

    degree = 6
    n_estimators = 50
    max_samples = 0.5
    ridge = RidgeCV(alphas=np.logspace(-3, 2, 20), fit_intercept=False)
    ridged = Pipeline([('poly', PolynomialFeatures(degree=degree)),
                       ('Ridge', ridge)])
    bagging_ridged = BaggingRegressor(ridged,
                                      n_estimators=n_estimators,
                                      max_samples=max_samples)
    dtr = DecisionTreeRegressor(max_depth=9)
    regs = [('DecisionTree', dtr), ('Ridge(%d Degree)' % degree, ridged),
            ('Bagging Ridge(%d Degree)' % degree, bagging_ridged),
            ('Bagging DecisionTree',
             BaggingRegressor(dtr,
                              n_estimators=n_estimators,
                              max_samples=max_samples))]
    x_test = np.linspace(1.1 * x.min(), 1.1 * x.max(), 1000)
    mpl.rcParams['font.sans-serif'] = [u'SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    plt.figure(figsize=(12, 8), facecolor='w')
예제 #23
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score

labeled = pd.read_csv(
    "https://storage.googleapis.com/kaggle_datasets/Titanic-Machine-Learning-from-Disaster/train.csv"
)
# Removed observations without Age
labeled = labeled[~labeled["Age"].isna()]
X = labeled.loc[:, ["Fare", "Age"]].values
y = labeled.loc[:, "Survived"].values
d = 10
poly_degrees = list(range(1, d + 1))
cv_accuracies = []
for poly_d in poly_degrees:
    X_poly = PolynomialFeatures(poly_d).fit_transform(X)
    # Get cross validated train/valid accuracy
    clf = LogisticRegression()
    cv_acc = np.array(cross_val_score(clf, X_poly, y)).mean()
    cv_accuracies.append(cv_acc)

plt.plot(cv_accuracies, marker="o")
plt.xticks(range(d), poly_degrees)
plt.title("Cross-validated accuracies")
plt.xlabel("Degrees")
plt.ylabel("CV Accuracy")
plt.show()
def Polynomial_Regression(degree):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),  # 建立多项式
        ("std_scaler", StandardScaler()),  # 归一化处理
        ("lin_reg", LinearRegression())  # 回归方程
    ])
#%% Polynomial Regression

import numpy as np
import numpy.random as rnd

np.random.seed(42)

# Generate random data
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)

# Add the poly feature
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)
X[0]
X_poly[0]

# Fit Linear regression using the poly feature
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
lin_reg.intercept_, lin_reg.coef_

X_new = np.linspace(-3, 3, 100).reshape(100, 1)
X_new_poly = poly_features.transform(X_new)
y_new = lin_reg.predict(X_new_poly)
plt.plot(X, y, "b.")
plt.plot(X_new, y_new, "r-", linewidth=2, label="Predictions")
plt.xlabel("$x_1$", fontsize=18)
def Lasso_Regression(degree, alpha):
    return Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),  # 建立多项式
        ("std_scaler", StandardScaler()),  # 归一化处理
        ("ridge_reg", Lasso(alpha=alpha))  # 岭回归方程
    ])
#This driver driver is for a nonlinear SVM classifier of Iris data 
#
#Steven Large
#May 27th 2018

import numpy as np
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

moons = make_moons()

X = moons[0]
y = moons[1]

polynomial_svm_clf = Pipeline([
	("poly_features", PolynomialFeatures(degree=3)),
	("scaler", StandardScaler()),
	("svm_clf", LinearSVC(C=10, loss="hinge"))
	])

polynomial_svm_clf.fit(X, y)


# Employ a quadratic regression to smooth win share vs age data

import operator

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

x = wsby['Age'].values.reshape(-1, 1)
y = wsby['WS']

polynomial_features= PolynomialFeatures(degree=2)
x_poly = polynomial_features.fit_transform(x)

model = LinearRegression()
model.fit(x_poly, y)
y_poly_pred = model.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
# R-squared is 0.8705593525409101, pretty good

plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
예제 #29
0
    # data = np.loadtxt(path, dtype=float, delimiter=',',
    #                   converters={4: iris_type})

    data = pd.read_csv(path, header=None)
    data[4] = pd.Categorical(data[4]).codes
    # iris_types = data[4].unique()
    # print iris_types
    # for i, type in enumerate(iris_types):
    #     data.set_value(data[4] == type, 4, i)
    x, y = np.split(data.values, (4, ), axis=1)
    # print 'x = \n', x
    # print 'y = \n', y
    # 仅使用前两列特征
    x = x[:, :2]
    lr = Pipeline([('sc', StandardScaler()),
                   ('poly', PolynomialFeatures(degree=3)),
                   ('clf', LogisticRegression())])
    lr.fit(x, y.ravel())
    y_hat = lr.predict(x)
    y_hat_prob = lr.predict_proba(x)
    np.set_printoptions(suppress=True)
    print 'y_hat = \n', y_hat
    print 'y_hat_prob = \n', y_hat_prob
    print u'准确度:%.2f%%' % (100 * np.mean(y_hat == y.ravel()))
    # 画图
    N, M = 500, 500  # 横纵各采样多少个值
    x1_min, x1_max = x[:, 0].min(), x[:, 0].max()  # 第0列的范围
    x2_min, x2_max = x[:, 1].min(), x[:, 1].max()  # 第1列的范围
    t1 = np.linspace(x1_min, x1_max, N)
    t2 = np.linspace(x2_min, x2_max, M)
    x1, x2 = np.meshgrid(t1, t2)  # 生成网格采样点
예제 #30
0
plt.ylabel('y')

plt.ylim(-1.5, 1.5)

plt.show()

#raise SystemExit()

#------------------------------------------------------------------------------
#  Treinar um regressor polinomial com o conjunto de treinamento
#------------------------------------------------------------------------------

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

pf = PolynomialFeatures(polynomial_degree)
modelo = LinearRegression()

Z_train = pf.fit_transform(X_train)

modelo = modelo.fit(Z_train, y_train)

#------------------------------------------------------------------------------
#  Obter a resposta do modelo para o proprio conjunto de treinamento
#------------------------------------------------------------------------------

y_train_pred = modelo.predict(Z_train)

#------------------------------------------------------------------------------
#  Obter a resposta do modelo para o conjunto de teste
#------------------------------------------------------------------------------