reg = LinearRegression().fit(X_product, y) line_product = np.hstack([line_binned, line * line_binned]) plt.plot(line, reg.predict(line_product), label="bined * origin reg") for bin in bins: plt.plot([bin, bin], [-3, 3], ":", c="k", linewidth=1) plt.legend(loc="best") plt.ylabel("reg print") plt.xlabel("input") plt.plot(X[:, 0], y, "o", c="k") #다항식 추가 from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=10, include_bias=False) #degree=n : X^n차항까지 추가 poly.fit(X) X_poly = poly.transform(X) print("X_poly.shape : {}".format(X_poly.shape)) print("X element :\n{}".format(X[:5])) print("X_poly element : \n{}".format(X_poly[:5])) print("section name :{}".format(poly.get_feature_names())) #다항식 추가한 회귀(단 : 데이터 없는 곳에선 과적합) reg = LinearRegression().fit(X_poly, y) line_poly = poly.transform(line) plt.plot(line, reg.predict(line_poly), label="multinomial reg") plt.legend(loc="best") plt.ylabel("reg print")
y = dataset.iloc[:, 2].values # Training and test from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=0) from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, y) # fitting Polynomial Ression to the @staticmethod from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=5) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) lin_reg2 = LinearRegression() lin_reg2.fit(X_poly, y) plt.scatter(X, y, color='red') plt.plot(X, lin_reg2.predict(poly_reg.fit_transform(X)), color='blue') plt.title('Postopn vs Salaries') plt.xlabel('Postion level') plt.ylabel('Salary') plt.show()
label=r'Lasso; $\alpha = 0.00001$') # alpha here is for transparency plt.plot(lr.coef_, alpha=0.7, linestyle='none', marker='o', markersize=5, color='green', label='Linear Regression', zorder=2) plt.xlabel('Coefficient Index', fontsize=16) plt.ylabel('Coefficient Magnitude', fontsize=16) plt.legend(fontsize=13, loc=4) plt.tight_layout() plt.show() # %% steps = [('scalar', StandardScaler()), ('poly', PolynomialFeatures(degree=2)), ('model', LinearRegression())] pipeline = Pipeline(steps) pipeline.fit(X_train, y_train) y_pred_linear = pipeline.predict(X_test) print('Training score: {}'.format(pipeline.score(X_train, y_train))) print('Test score: {}'.format(pipeline.score(X_test, y_test))) print('RMSE: {}'.format( np.sqrt(metrics.mean_squared_error(y_test, y_pred_linear)))) # %% steps = [('scalar', StandardScaler()), ('poly', PolynomialFeatures(degree=2)), ('model', Ridge(alpha=10, fit_intercept=True))]
v.feature_names_ #결측치 처리 from sklearn.preprocessing import Imputer imp = Imputer(missing_values='NaN', strategy='mean', axis=0) imp.fit_transform([[1, 2], [np.nan, 3], [7, 6]]) #파생변수 생성 # [1, a, b, a^2, ab, b^2] from sklearn.preprocessing import PolynomialFeatures X = np.arange(6).reshape(3, 2) X poly = PolynomialFeatures(2) poly.fit_transform(X) ## [1, a, b, a^2, ab, b^2] #apply와 같은 효과 from sklearn.preprocessing import FunctionTransformer def all_but_first_column(X): return X[:, 1:] X = np.arange(12).reshape(4, 3) X FunctionTransformer(all_but_first_column).fit_transform(X)
from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train) # Different regression algorithms from sklearn.linear_model import LinearRegression regressor = LinearRegression() from sklearn.preprocessing import PolynomialFeatures regressor = PolynomialFeatures(degree=4) X_poly = regressor.fit_transform(X) from sklearn.svm import SVR regressor = SVR(kernel="rbf") from sklearn.tree import DecisionTreeRegressor regressor = DecisionTreeRegressor() from sklearn.ensemble import RandomForestRegressor regressor = RandomForestRegressor(n_estimators=100) # Fitting and predicting
y = y[:-forecast_out] print('Dimension of X', X.shape) print('Dimension of X_lately', X_lately.shape) print('Dimension of y', y.shape) # Separation of training and testing of model by cross validation train test split X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2) # Linear regression clfreg = LinearRegression(n_jobs=-1) clfreg.fit(X_train, y_train) # Quadratic Regression 2 clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge()) clfpoly2.fit(X_train, y_train) # Quadratic Regression 3 clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge()) clfpoly3.fit(X_train, y_train) # KNN Regression clfknn = KNeighborsRegressor(n_neighbors=2) clfknn.fit(X_train, y_train) confidencereg = clfreg.score(X_test, y_test) confidencepoly2 = clfpoly2.score(X_test, y_test) confidencepoly3 = clfpoly3.score(X_test, y_test) confidenceknn = clfknn.score(X_test, y_test)
onehotEncr = OneHotEncoder(categorical_features=[0]) #onehotEncr = OneHotEncoder(categorical_features=[1]) X = onehotEncr.fit_transform(X).toarray() labEnc_Y = LabelEncoder() Y = labEnc_Y.fit_transform(Y) plt.scatter(X[:,2], Y, marker='o') """New""" np.random.seed(0) poly_features = PolynomialFeatures(degree=2, include_bias=False) X = poly_features.fit_transform(X) model = SGDRegressor(max_iter=10000, eta0=0.001) model.fit(X,Y) print('Coeff R2 =', model.score(X, Y)) plt.scatter(X[:,4], Y, marker='o') plt.scatter(X[:,0], model.predict(X), c='red', marker='+')
def train_model_lassoLARS_style(predictors, predictants, alpha, deg): Vandermonde = PolynomialFeatures(degree=deg) Vandermonde = Vandermonde.fit_transform(predictors) LinModel = linear_model.LassoLars(alpha=alpha) LinModel = LinModel.fit(Vandermonde, predictants) return LinModel
import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns #Let's examine the distribution of the predicted values of the training data. Title = 'Distribution Plot of Predicted Value Using Training Data vs Training Data Distribution' DistributionPlot(y_train, yhat_train, "Actual Values (Train)", "Predicted Values (Train)", Title) Title='Distribution Plot of Predicted Value Using Test Data vs Data Distribution of Test Data' DistributionPlot(y_test,yhat_test,"Actual Values (Test)","Predicted Values (Test)",Title) from sklearn.preprocessing import PolynomialFeatures #Overfitting #Let's use 55 percent of the data for testing and the rest for training: x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.45, random_state=0) #We will perform a degree 5 polynomial transformation on the feature 'horse power'. pr = PolynomialFeatures(degree=5) x_train_pr = pr.fit_transform(x_train[['horsepower']]) x_test_pr = pr.fit_transform(x_test[['horsepower']]) pr #Now let's create a linear regression model "poly" and train it. poly = LinearRegression() poly.fit(x_train_pr, y_train) #We can see the output of our model using the method "predict." then assign the values to "yhat". yhat = poly.predict(x_test_pr) yhat[0:5] #Let's take the first five predicted values and compare it to the actual targets. print("Predicted values:", yhat[0:4]) print("True values:", y_test[0:4].values) PollyPlot(x_train[['horsepower']], x_test[['horsepower']], y_train, y_test, poly,pr) #R^2 of the training data:
import pylab as plt import pandas as pd #importing dataset dataset = pd.read_csv('Position_Salaries.csv') X = dataset.iloc[:, 1:-1].values y = dataset.iloc[:, -1].values #fitting linear regression from sklearn.linear_model import LinearRegression lin_regressor = LinearRegression() lin_regressor.fit(X, y) #fitting polynomial regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_regressor = PolynomialFeatures(degree=2) X_poly = poly_regressor.fit_transform(X) lin_regressor2 = LinearRegression() lin_regressor2.fit(X_poly, y) #visualization of linear regression resutls plt.scatter(X, y, color='red') plt.plot(X, lin_regressor.predict(X)) plt.title('Linear Regression (Truth)') plt.xlabel('Positoin Level') plt.ylabel('Salary') plt.show() #visualization of polynomial linear regression plt.scatter(X, y, color='red') plt.plot(X, lin_regressor2.predict(poly_regressor.fit_transform(X)))
} return results def runLR(train_X, train_y, test_X, test_y, test_X2, params): print('Train LR') model = RandomForestClassifier(**params) model.fit(train_X, train_y) print('Predict 1/2') pred_test_y = model.predict_proba(test_X)[:, 1] print('Predict 2/2') pred_test_y2 = model.predict_proba(test_X2)[:, 1] return pred_test_y, pred_test_y2 target = train['redemption_status'].values poly = PolynomialFeatures(degree=2) sc = StandardScaler() lr_params = {'n_estimators': 1000} results = run_cv_model( sc.fit_transform(poly.fit_transform(train[train_cols].fillna(0).values)), sc.fit_transform(poly.fit_transform(test[train_cols].fillna(0).values)), target, runLR, lr_params, auc, 'lr') day = 2 sub = 3 name = f"day_{day}_sub_{sub}" tmp = dict(zip(test.id.values, results['test'])) answer1 = pd.DataFrame() answer1['id'] = test.id.values answer1['redemption_status'] = answer1['id'].map(tmp) answer1.to_csv(f'{name}.csv', index=None)
# imputer is for handling missing values from sklearn.preprocessing import Imputer imputer = Imputer(strategy='median') poly_target = poly_features['TARGET'] poly_features = poly_features.drop(columns=['TARGET']) # Need to impute missing values poly_features = imputer.fit_transform(poly_features) poly_features_test = imputer.transform(poly_features_test) from sklearn.preprocessing import PolynomialFeatures # Create the polynomial object with specified degree poly_transformer = PolynomialFeatures(degree=3) # Train the polynomial features poly_transformer.fit(poly_features) # Transform the features poly_features = poly_transformer.transform(poly_features) poly_features_test = poly_transformer.transform(poly_features_test) #print('Polynomial Features shape: ', poly_features.shape) #print(poly_transformer.get_feature_names(input_features = ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH'])[:34]) poly_features = pd.DataFrame(poly_features, columns=poly_transformer.get_feature_names([ 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH' ]))
def PolynomialLogisticRegression(degree): return Pipeline([('poly', PolynomialFeatures(degree=degree)), ('std_scaler', StandardScaler()), ('log_reg', LogisticRegression())])
from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from helper import get_data, split_data, visualize name = 'Polynomial' if __name__ == '__main__': x, y = get_data() x_train, x_test, y_train, y_test = split_data(x, y) pf = PolynomialFeatures() x_train_polynomial = pf.fit_transform(x_train) x_test_polynomial = pf.fit_transform(x_test) regression = LinearRegression() regression.fit(x_train_polynomial, y_train) y_predicted = regression.predict(x_test_polynomial) visualize(y_test, y_predicted, name)
#1. kutuphaneler import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures import matplotlib.pyplot as plt # veri yukleme df = pd.read_csv('winequality-red.csv') X = df[['quality']] y = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) model = PolynomialFeatures(degree=4) y_ = model.fit_transform(y) y_test_ = model.fit_transform(y_test) print(y_, y_test_) lg = LinearRegression() lg.fit(y_,X) predicted_data = lg.predict(y_test_) predicted_data = np.round(predicted_data) print(mean_squared_error(X_test,predicted_data)) print(predicted_data)
normalizer = preprocessing.Normalizer().fit(X) print(normalizer.transform(X)) # Binarization # Feature binarization print("Feature binarization") X = [[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]] binarizer = preprocessing.Binarizer().fit(X) print(binarizer.transform(X)) # Imputation of missing values print("Imputation of missing values") from sklearn.preprocessing import Imputer imp = Imputer(missing_values='NaN', strategy='mean', verbose=0) imp.fit([[1, 2], [np.nan, 3], [7, 6]]) X = [[np.nan, 2], [6, np.nan], [7, 6]] print(imp.transform(X)) # Generating polynomial features print("Generating polynomial features") from sklearn.preprocessing import PolynomialFeatures X = np.arange(6).reshape(3, 2) poly = PolynomialFeatures(2) print(poly.fit_transform(X)) X = np.arange(9).reshape(3, 3) poly = PolynomialFeatures(degree=3, interaction_only=True) print(poly.fit_transform(X))
def predictStockPrices(df): dfreg = df.loc[:, ['Adj Close', 'Volume']] dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0 dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0 print(dfreg.tail()) # cleanup process # -------------------------------------------------------------------- # drop missing values dfreg.fillna(value=-99999, inplace=True) # we want to separate 1 percent of the data to forecast forecast_out = int(math.ceil(0.01 * len(dfreg))) # we want to predict the AdjClose forecast_col = 'Adj Close' dfreg['label'] = dfreg[forecast_col].shift(-forecast_out) X = np.array(dfreg.drop(['label'], 1)) # scale the X so that everyone can have the same distribution for linear regression X = sk.preprocessing.scale(X) # find data series of late X and early X (train) for model generation and evaluation X_lately = X[-forecast_out:] X = X[:-forecast_out] # Separate label and identify it as y y = np.array(dfreg['label']) y = y[:-forecast_out] print('Dimension of X', X.shape) print('Dimension of y', y.shape) # Separation of training and testing of model by cross validation train test split X_train, X_test, y_train, y_test = sk.model_selection.train_test_split( X, y, test_size=0.2) # Linear Regression clfreg = LinearRegression(n_jobs=-1) clfreg.fit(X_train, y_train) # Lasso Regression clflasso = Lasso(selection='random') clflasso.fit(X_train, y_train) # Quadratic Regression 2 clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge()) clfpoly2.fit(X_train, y_train) # Quadratic Regression 3 clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge()) clfpoly3.fit(X_train, y_train) # KNN clfknn = KNeighborsRegressor(n_neighbors=2) clfknn.fit(X_train, y_train) # Test the models confidencereg = clfreg.score(X_test, y_test) confidencepoly2 = clfpoly2.score(X_test, y_test) confidencepoly3 = clfpoly3.score(X_test, y_test) confidenceknn = clfknn.score(X_test, y_test) confidencelasso = clflasso.score(X_test, y_test) print("The linear regression confidence is ", confidencereg) print("The quadratic regression 2 confidence is ", confidencepoly2) print("The quadratic regression 3 confidence is ", confidencepoly3) print("The knn regression confidence is ", confidenceknn) print("The knn lasso confidence is ", confidencelasso) # Predict predictAndPlot(clfreg, X_lately, dfreg.copy(), confidencereg, forecast_out) predictAndPlot(clfpoly2, X_lately, dfreg.copy(), confidencepoly2, forecast_out) predictAndPlot(clfpoly3, X_lately, dfreg.copy(), confidencepoly3, forecast_out) predictAndPlot(clfknn, X_lately, dfreg.copy(), confidenceknn, forecast_out) predictAndPlot(clflasso, X_lately, dfreg.copy(), confidencelasso, forecast_out)
def _get_polynomials(self, x): poly = PolynomialFeatures(degree=self.degree) x_poly = poly.fit_transform(x.reshape(-1, 1)) return x_poly
-1.17165272, -0.89129801, -0.85572252, -0.7736467, -0.21234812,-0.12717219]) x_test = np.array([0.31273956 , 0.46122891, 0.4917774, 0.7039386, 0.84386983, 0.97020886]) y_test = np.array([0.909136, 0.38747724, -0.92084687, -1-0.03804487,.03453301,-0.1177253]) # create matrix versions of these arrays X_train = x_train[:, np.newaxis] X_test = x_test[:, np.newaxis] colors = ['teal', 'yellow' ,'green', 'gold'] lw = 2 train_error = [] test_error = [] for degree in range(11): for count, degree in enumerate([degree]): fig.clf() model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha = 0)) model.fit(X_train, y_train) train_error.append(mean_squared_error(y_train, model.predict(X_train))) test_error.append(mean_squared_error(y_test, model.predict(X_test))) plt.plot(np.arange(11), train_error, color='green', label='train') plt.plot(np.arange(11), test_error, color='red', label='test') plt.ylim((0.0, 1e0)) plt.ylabel('Mean Squared Error)') plt.xlabel('Degree') plt.legend(loc='lower left') fig.savefig('Testing_Answer4_1.png', bbox_inches='tight')
from scipy.stats import boxcox X_train_transformed = X_train.copy() X_train_transformed['Fare'] = boxcox(X_train_transformed['Fare'] + 1)[0] X_test_transformed = X_test.copy() X_test_transformed['Fare'] = boxcox(X_test_transformed['Fare'] + 1)[0] # Rescale data from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train_transformed_scaled = scaler.fit_transform(X_train_transformed) X_test_transformed_scaled = scaler.transform(X_test_transformed) # Get polynomial features from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=2).fit(X_train_transformed) X_train_poly = poly.transform(X_train_transformed_scaled) X_test_poly = poly.transform(X_test_transformed_scaled) # Debug print(poly.get_feature_names()) # Select features using chi-squared test from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 ## Get score using original model logreg = LogisticRegression(C=1) logreg.fit(X_train, y_train) scores = cross_val_score(logreg, X_train, y_train, cv=10) print('CV accuracy (original): %.3f +/- %.3f' % (np.mean(scores), np.std(scores))) highest_score = np.mean(scores)
def true_fun(X): return np.sin(2 * np.pi * X) np.random.seed(0) n_samples = 30 degrees = [1, 4, 15] X = np.sort(np.random.rand(n_samples)) y = true_fun(X) + np.random.randn(n_samples) * 0.1 plt.figure(figsize=(7, 2.5)) for i in range(len(degrees)): ax = plt.subplot(1, len(degrees), i + 1) plt.setp(ax, xticks=(), yticks=()) polynomial_features = PolynomialFeatures(degree=degrees[i], include_bias=False) linear_regression = LinearRegression() pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) pipeline.fit(X[:, np.newaxis], y) # Evaluate the models using crossvalidation scores = cross_val_score(pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=10) X_test = np.linspace(0, 1, 100) plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model") plt.plot(X_test, true_fun(X_test), label="True function") plt.scatter(X, y, edgecolor='b', s=20, label="Samples") plt.xlabel("x") plt.ylabel("y") plt.xlim((0, 1)) plt.ylim((-2, 2))
if __name__ == "__main__": np.random.seed(0) N = 200 # x = np.random.rand(N) * 10 - 5 # [-5,5) # x = np.sort(x) x = np.linspace(-5, 5, N) y = f(x) + 0.05 * np.random.randn(N) x.shape = -1, 1 degree = 6 n_estimators = 50 max_samples = 0.5 ridge = RidgeCV(alphas=np.logspace(-3, 2, 20), fit_intercept=False) ridged = Pipeline([('poly', PolynomialFeatures(degree=degree)), ('Ridge', ridge)]) bagging_ridged = BaggingRegressor(ridged, n_estimators=n_estimators, max_samples=max_samples) dtr = DecisionTreeRegressor(max_depth=9) regs = [('DecisionTree', dtr), ('Ridge(%d Degree)' % degree, ridged), ('Bagging Ridge(%d Degree)' % degree, bagging_ridged), ('Bagging DecisionTree', BaggingRegressor(dtr, n_estimators=n_estimators, max_samples=max_samples))] x_test = np.linspace(1.1 * x.min(), 1.1 * x.max(), 1000) mpl.rcParams['font.sans-serif'] = [u'SimHei'] mpl.rcParams['axes.unicode_minus'] = False plt.figure(figsize=(12, 8), facecolor='w')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.model_selection import cross_val_score labeled = pd.read_csv( "https://storage.googleapis.com/kaggle_datasets/Titanic-Machine-Learning-from-Disaster/train.csv" ) # Removed observations without Age labeled = labeled[~labeled["Age"].isna()] X = labeled.loc[:, ["Fare", "Age"]].values y = labeled.loc[:, "Survived"].values d = 10 poly_degrees = list(range(1, d + 1)) cv_accuracies = [] for poly_d in poly_degrees: X_poly = PolynomialFeatures(poly_d).fit_transform(X) # Get cross validated train/valid accuracy clf = LogisticRegression() cv_acc = np.array(cross_val_score(clf, X_poly, y)).mean() cv_accuracies.append(cv_acc) plt.plot(cv_accuracies, marker="o") plt.xticks(range(d), poly_degrees) plt.title("Cross-validated accuracies") plt.xlabel("Degrees") plt.ylabel("CV Accuracy") plt.show()
def Polynomial_Regression(degree): return Pipeline([ ("poly", PolynomialFeatures(degree=degree)), # 建立多项式 ("std_scaler", StandardScaler()), # 归一化处理 ("lin_reg", LinearRegression()) # 回归方程 ])
#%% Polynomial Regression import numpy as np import numpy.random as rnd np.random.seed(42) # Generate random data m = 100 X = 6 * np.random.rand(m, 1) - 3 y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1) # Add the poly feature from sklearn.preprocessing import PolynomialFeatures poly_features = PolynomialFeatures(degree=2, include_bias=False) X_poly = poly_features.fit_transform(X) X[0] X_poly[0] # Fit Linear regression using the poly feature lin_reg = LinearRegression() lin_reg.fit(X_poly, y) lin_reg.intercept_, lin_reg.coef_ X_new = np.linspace(-3, 3, 100).reshape(100, 1) X_new_poly = poly_features.transform(X_new) y_new = lin_reg.predict(X_new_poly) plt.plot(X, y, "b.") plt.plot(X_new, y_new, "r-", linewidth=2, label="Predictions") plt.xlabel("$x_1$", fontsize=18)
def Lasso_Regression(degree, alpha): return Pipeline([ ("poly", PolynomialFeatures(degree=degree)), # 建立多项式 ("std_scaler", StandardScaler()), # 归一化处理 ("ridge_reg", Lasso(alpha=alpha)) # 岭回归方程 ])
#This driver driver is for a nonlinear SVM classifier of Iris data # #Steven Large #May 27th 2018 import numpy as np from sklearn.datasets import make_moons from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures moons = make_moons() X = moons[0] y = moons[1] polynomial_svm_clf = Pipeline([ ("poly_features", PolynomialFeatures(degree=3)), ("scaler", StandardScaler()), ("svm_clf", LinearSVC(C=10, loss="hinge")) ]) polynomial_svm_clf.fit(X, y)
# Employ a quadratic regression to smooth win share vs age data import operator import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import PolynomialFeatures x = wsby['Age'].values.reshape(-1, 1) y = wsby['WS'] polynomial_features= PolynomialFeatures(degree=2) x_poly = polynomial_features.fit_transform(x) model = LinearRegression() model.fit(x_poly, y) y_poly_pred = model.predict(x_poly) rmse = np.sqrt(mean_squared_error(y,y_poly_pred)) r2 = r2_score(y,y_poly_pred) # R-squared is 0.8705593525409101, pretty good plt.scatter(x, y, s=10) # sort the values of x before line plot sort_axis = operator.itemgetter(0) sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis) x, y_poly_pred = zip(*sorted_zip)
# data = np.loadtxt(path, dtype=float, delimiter=',', # converters={4: iris_type}) data = pd.read_csv(path, header=None) data[4] = pd.Categorical(data[4]).codes # iris_types = data[4].unique() # print iris_types # for i, type in enumerate(iris_types): # data.set_value(data[4] == type, 4, i) x, y = np.split(data.values, (4, ), axis=1) # print 'x = \n', x # print 'y = \n', y # 仅使用前两列特征 x = x[:, :2] lr = Pipeline([('sc', StandardScaler()), ('poly', PolynomialFeatures(degree=3)), ('clf', LogisticRegression())]) lr.fit(x, y.ravel()) y_hat = lr.predict(x) y_hat_prob = lr.predict_proba(x) np.set_printoptions(suppress=True) print 'y_hat = \n', y_hat print 'y_hat_prob = \n', y_hat_prob print u'准确度:%.2f%%' % (100 * np.mean(y_hat == y.ravel())) # 画图 N, M = 500, 500 # 横纵各采样多少个值 x1_min, x1_max = x[:, 0].min(), x[:, 0].max() # 第0列的范围 x2_min, x2_max = x[:, 1].min(), x[:, 1].max() # 第1列的范围 t1 = np.linspace(x1_min, x1_max, N) t2 = np.linspace(x2_min, x2_max, M) x1, x2 = np.meshgrid(t1, t2) # 生成网格采样点
plt.ylabel('y') plt.ylim(-1.5, 1.5) plt.show() #raise SystemExit() #------------------------------------------------------------------------------ # Treinar um regressor polinomial com o conjunto de treinamento #------------------------------------------------------------------------------ from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression pf = PolynomialFeatures(polynomial_degree) modelo = LinearRegression() Z_train = pf.fit_transform(X_train) modelo = modelo.fit(Z_train, y_train) #------------------------------------------------------------------------------ # Obter a resposta do modelo para o proprio conjunto de treinamento #------------------------------------------------------------------------------ y_train_pred = modelo.predict(Z_train) #------------------------------------------------------------------------------ # Obter a resposta do modelo para o conjunto de teste #------------------------------------------------------------------------------