def test_regressor_modifications(self): regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1) kpcovr = self.model(mixing=0.5, regressor=regressor, kernel="rbf", gamma=0.1) # KPCovR regressor matches the original self.assertTrue( regressor.get_params() == kpcovr.regressor.get_params()) # KPCovR regressor updates its parameters # to match the original regressor regressor.set_params(gamma=0.2) self.assertTrue( regressor.get_params() == kpcovr.regressor.get_params()) # Fitting regressor outside KPCovR fits the KPCovR regressor regressor.fit(self.X, self.Y) self.assertTrue(hasattr(kpcovr.regressor, "dual_coef_")) # Raise error during KPCovR fit since regressor and KPCovR # kernel parameters now inconsistent with self.assertRaises(ValueError) as cm: kpcovr.fit(self.X, self.Y) self.assertTrue( str(cm.message), "Kernel parameter mismatch: the regressor has kernel parameters " "{kernel: linear, gamma: 0.2, degree: 3, coef0: 1, kernel_params: None}" " and KernelPCovR was initialized with kernel parameters " "{kernel: linear, gamma: 0.1, degree: 3, coef0: 1, kernel_params: None}", )
def kernel_ridge_regression(): n_samples, n_features = 10, 5 rng = np.random.RandomState(0) y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) clf = KernelRidge(alpha=1.0) clf.fit(X, y) print(clf.alpha) print(clf.get_params())
class KernelMethod(Classifier): def __init__(self, **kwargs): super().__init__() self._model = KernelRidge(**kwargs) self.hyperparams = self._model.get_params() self.enc = None def fit(self, X, Y): """ Training model with data provided. Parameters ========== X: Pandas DataFrame. Attribute Values Y: Pandas Series. Object labels. Returns ======= void. """ X = X.values Y = Y.values #Verify that the labels can be casted to floats try: float(Y[0]) except ValueError: self.enc = OneHotEncoder() Y = self.enc.fit_transform(Y.reshape(-1, 1)).toarray() self._model.fit(X=X, y=Y) def predict(self, X): """ Returns prediction label for X. Parameters ========== X: Pandas DataFrame -> Data to predict value Returns ======= Prediction labels: array like of size (nsamples, [n_features]) """ pred = self._model.predict(X) if (self.enc is None): return pred else: return self.enc.inverse_transform( (pred == pred.max(axis=1, keepdims=1)).astype(float)).reshape(-1)
def fit(self, X, Y): """ Fit the model with X and Y. Parameters ---------- X: ndarray, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. It is suggested that :math:`\\mathbf{X}` be centered by its column- means and scaled. If features are related, the matrix should be scaled to have unit variance, otherwise :math:`\\mathbf{X}` should be scaled so that each feature has a variance of 1 / n_features. Y: ndarray, shape (n_samples, n_properties) Training data, where n_samples is the number of samples and n_properties is the number of properties It is suggested that :math:`\\mathbf{X}` be centered by its column- means and scaled. If features are related, the matrix should be scaled to have unit variance, otherwise :math:`\\mathbf{Y}` should be scaled so that each feature has a variance of 1 / n_features. Returns ------- self: object Returns the instance itself. """ if self.regressor is not None and not isinstance( self.regressor, KernelRidge): raise ValueError("Regressor must be an instance of `KernelRidge`") X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True) self.X_fit_ = X.copy() if self.n_components is None: if self.svd_solver != "arpack": self.n_components = X.shape[0] else: self.n_components = X.shape[0] - 1 K = self._get_kernel(X) if self.center: self.centerer_ = KernelNormalizer() K = self.centerer_.fit_transform(K) self.n_samples_ = X.shape[0] if self.regressor is None: regressor = KernelRidge( kernel=self.kernel, gamma=self.gamma, degree=self.degree, coef0=self.coef0, kernel_params=self.kernel_params, **self.regressor_params, ) else: regressor = self.regressor kernel_attrs = [ "kernel", "gamma", "degree", "coef0", "kernel_params" ] if not all([ getattr(self, attr) == getattr(regressor, attr) for attr in kernel_attrs ]): raise ValueError( "Kernel parameter mismatch: the regressor has kernel parameters {%s}" " and KernelPCovR was initialized with kernel parameters {%s}" % ( ", ".join([ "%s: %r" % (attr, getattr(regressor, attr)) for attr in kernel_attrs ]), ", ".join([ "%s: %r" % (attr, getattr(self, attr)) for attr in kernel_attrs ]), )) # Check if regressor is fitted; if not, fit with precomputed K # to avoid needing to compute the kernel a second time self.regressor_ = check_krr_fit(regressor, K, X, Y) W = self.regressor_.dual_coef_.reshape(X.shape[0], -1) # Use this instead of `self.regressor_.predict(K)` # so that we can handle the case of the pre-fitted regressor Yhat = K @ W # When we have an unfitted regressor, # we fit it with a precomputed K # so we must subsequently "reset" it so that # it will work on the particular X # of the KPCovR call. The dual coefficients are kept. # Can be bypassed if the regressor is pre-fitted. try: check_is_fitted(regressor) except NotFittedError: self.regressor_.set_params(**regressor.get_params()) self.regressor_.X_fit_ = self.X_fit_ self.regressor_._check_n_features(self.X_fit_, reset=True) # Handle svd_solver self._fit_svd_solver = self.svd_solver if self._fit_svd_solver == "auto": # Small problem or self.n_components == 'mle', just call full PCA if max(X.shape) <= 500 or self.n_components == "mle": self._fit_svd_solver = "full" elif self.n_components >= 1 and self.n_components < 0.8 * min( X.shape): self._fit_svd_solver = "randomized" # This is also the case of self.n_components in (0,1) else: self._fit_svd_solver = "full" self._fit(K, Yhat, W) self.ptk_ = self.pt__ @ K self.pty_ = self.pt__ @ Y if self.fit_inverse_transform: self.ptx_ = self.pt__ @ X self.pky_ = self.pkt_ @ self.pty_ self.components_ = self.pkt_.T # for sklearn compatibility return self
def relationship_road_traffic_accidents(): accidents = glob.glob('accident/*.csv') acc = 0 for a in accidents: acc = ReadAccident.Accident(a) toronto_traffic = pd.read_csv('traffic/traffic-vehicle.csv') # Relationship between peak vehicle volume and # of accidents at that intersection shp_files = glob.glob('shapefiles/*.shp') shp_data_objs = [] for shp in shp_files: print(shp) shp_obj = ReadSHP.ReadSHPFile(shp, shp) shp_data_objs.append(shp_obj) data = acc.data intersec_id = {} other_xs = {} print('Running') for i in range(len(data)): long = data[i].long lat = data[i].lat fatal = data[i].fatal min_index = 0 min_dist = math.sqrt( math.pow(long - toronto_traffic.loc[0, 'Longitude'], 2) + math.pow(lat - toronto_traffic.loc[0, 'Latitude'], 2)) for j in range(1, len(toronto_traffic.index.values)): dist = math.sqrt( math.pow(long - toronto_traffic.loc[j, 'Longitude'], 2) + math.pow(lat - toronto_traffic.loc[j, 'Latitude'], 2)) if dist < min_dist: min_dist = dist min_index = j if min_index not in intersec_id: intersec_id[min_index] = 1 else: intersec_id[min_index] += 1 if min_index not in other_xs: missing_xs = [] for s in shp_data_objs: missing_xs.append( s.binary_search( toronto_traffic.loc[min_index, 'Longitude'], toronto_traffic.loc[min_index, 'Latitude'])) other_xs[min_index] = missing_xs xs = [] ys = [] for j in intersec_id: dt = [toronto_traffic.loc[j, '8 Peak Hr Vehicle Volume']] dt.extend(other_xs[j]) xs.append(dt) ys.append(intersec_id[j]) print(xs) xs = np.array(xs) ys = np.array(ys) # xs = sm.add_constant(xs) model = sm.OLS(ys, xs).fit() print(model.summary()) print(model.params) clf = KernelRidge(alpha=1.0) clf.fit(xs, ys) file = open('k_reg.pickle', 'wb') pickle.dump(clf, file) print(clf.get_params())
from readFile import readDataSet from sklearn.kernel_ridge import KernelRidge from sklearn.externals import joblib data, nrows, ncols = readDataSet("YearPredictionMSD20.txt") X = data[:, 1:91] y = data[:, 0] clf = joblib.load('PCA_20k.pkl') X = clf.transform(X) print X clf = KernelRidge(alpha=1, kernel="linear") clf.fit(X, y) print clf.predict(X) print clf.get_params() print clf.score(X, y) joblib.dump(clf, "KRR_linear_20k.pkl") # from readFile import readDataSet # from sklearn.kernel_ridge import KernelRidge # from sklearn.externals import joblib # data, nrows, ncols = readDataSet("YearPredictionMSD20.txt") # X = data[:,1:91] # y = data[:,0] # clf = KernelRidge(alpha = 1e-3) # clf.fit(X, y) # joblib.dump(clf, "linear_KRR_20k.pkl")
from sklearn.preprocessing import StandardScaler SupportVectorMachine_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', SupportVectorMachine)]) KernelRidge_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', KernelRidge)]) MultiLayerPerceptron_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', MultiLayerPerceptron)]) KNeighbors_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', KNeighbors)]) ExtraTree_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', ExtraTree)]) DecisionTree_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', DecisionTree)]) RandomForest_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', RandomForest)]) GradientBoosting_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', GradientBoosting)]) HistGradientBoosting_pipe = Pipeline([('standardize', StandardScaler()), ('regressor', HistGradientBoosting)]) print() print("Default Regressor Hyperparameter values:") start = time.time() print(KernelRidge.get_params()) end = time.time() print("score with default values = ", getDefaultAccuracy(KernelRidge, KernelRidgeParameters, X, y)) print("Time Elapsed = ", end - start) print() start = time.time() #get_GridSearchCV(KernelRidge, KernelRidgeParameters, X, y) end = time.time() print("Time Elapsed = ", end - start) print() start = time.time() #get_RandomizedGridSearchCV(KernelRidge, KernelRidgeParameters, X, y) end = time.time() print("Time Elapsed = ", end - start)
factors = [ 'cylinders', 'displacement', 'horsepower', 'acceleration', 'weight', 'origin' ] X = pd.DataFrame(df[factors].copy()) y = df['mpg'].copy() X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=324) # In[165]: clf.get_params() # In[166]: clf.fit(X_train, y_train) # In[167]: y_predicted = clf.predict(X_test) # In[168]: rmse = sqrt(mean_squared_error(y_true=y_test, y_pred=y_predicted)) rmse # In[169]: