def test_krige_housing(): import ssl import urllib try: housing = fetch_california_housing() except (ssl.SSLError, urllib.error.URLError): ssl._create_default_https_context = ssl._create_unverified_context try: housing = fetch_california_housing() except PermissionError: # This can raise permission error on Appveyor pytest.skip("Failed to load california housing dataset") ssl._create_default_https_context = ssl.create_default_context # take only first 1000 p = housing["data"][:1000, :-2] x = housing["data"][:1000, -2:] target = housing["target"][:1000] p_train, p_test, y_train, y_test, x_train, x_test = train_test_split( p, target, x, train_size=0.7, random_state=10 ) for ml_model, krige_method in _methods(): reg_kr_model = RegressionKriging( regression_model=ml_model, method=krige_method, n_closest_points=2 ) reg_kr_model.fit(p_train, x_train, y_train) if krige_method == "ordinary": assert reg_kr_model.score(p_test, x_test, y_test) > 0.5 else: assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
def regression_kriging_evaluation(year, features_=['all']): from pykrige.rk import RegressionKriging from sklearn.model_selection import LeaveOneOut from sklearn.svm import SVR from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) svr_model = SVR(kernel='rbf', C=10, gamma=0.001) lr_model = LinearRegression() rf_model = RandomForestRegressor(n_estimators=2) Y, D, P, Tf, Gd = extract_raw_samples(year, crime_t=['total']) coords = get_centroid_ca() errors = [] for k in range(77): X_train, X_test, Y_train, Y_test = build_features(Y, D, P, Tf, Y, Gd, Y, k, features=features_, taxi_norm="bydestination") if k == 0: print X_train.shape coords_train = np.delete(coords, k, axis=0) coords_test = np.array(coords)[k,None] m_rk = RegressionKriging(regression_model=rf_model) m_rk.fit(X_train, coords_train, Y_train) z = m_rk.predict(X_test, coords_test) errors.append(abs(Y_test - z[0])) print np.mean(errors), np.mean(errors)/np.mean(Y) return errors
def test_regression_krige(): np.random.seed(1) x = np.linspace(-1.0, 1.0, 100) # create a feature matrix with 5 features X = np.tile(x, reps=(5, 1)).T y = ( 1 + 5 * X[:, 0] - 2 * X[:, 1] - 2 * X[:, 2] + 3 * X[:, 3] + 4 * X[:, 4] + 2 * (np.random.rand(100) - 0.5) ) # create lat/lon array lon = np.linspace(-180.0, 180.0, 10) lat = np.linspace(-90.0, 90.0, 10) lon_lat = np.array(list(product(lon, lat))) X_train, X_test, y_train, y_test, lon_lat_train, lon_lat_test = train_test_split( X, y, lon_lat, train_size=0.7, random_state=10 ) for ml_model, krige_method in _methods(): reg_kr_model = RegressionKriging( regression_model=ml_model, method=krige_method, n_closest_points=2 ) reg_kr_model.fit(X_train, lon_lat_train, y_train) assert reg_kr_model.score(X_test, lon_lat_test, y_test) > 0.25
def test_krige_housing(): try: housing = fetch_california_housing() except PermissionError: # This can raise permission error on Appveyor pytest.skip('Failed to load california housing dataset') # take only first 1000 p = housing['data'][:1000, :-2] x = housing['data'][:1000, -2:] target = housing['target'][:1000] p_train, p_test, y_train, y_test, x_train, x_test = \ train_test_split(p, target, x, train_size=0.7, random_state=10) for ml_model, krige_method in _methods(): reg_kr_model = RegressionKriging(regression_model=ml_model, method=krige_method, n_closest_points=2) reg_kr_model.fit(p_train, x_train, y_train) if krige_method == 'ordinary': assert reg_kr_model.score(p_test, x_test, y_test) > 0.5 else: assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
def test_krige_housing(): housing = fetch_california_housing() # take only first 1000 p = housing['data'][:1000, :-2] x = housing['data'][:1000, -2:] target = housing['target'][:1000] p_train, p_test, y_train, y_test, x_train, x_test = \ train_test_split(p, target, x, train_size=0.7, random_state=10) for ml_model, krige_method in _methods(): reg_kr_model = RegressionKriging(regression_model=ml_model, method=krige_method, n_closest_points=2) reg_kr_model.fit(p_train, x_train, y_train) if krige_method == 'ordinary': assert reg_kr_model.score(p_test, x_test, y_test) > 0.5 else: assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
def test_regression_krige(): np.random.seed(1) x = np.linspace(-1., 1., 100) # create a feature matrix with 5 features X = np.tile(x, reps=(5, 1)).T y = 1 + 5*X[:, 0] - 2*X[:, 1] - 2*X[:, 2] + 3*X[:, 3] + 4*X[:, 4] + \ 2*(np.random.rand(100) - 0.5) # create lat/lon array lon = np.linspace(-180., 180.0, 10) lat = np.linspace(-90., 90., 10) lon_lat = np.array(list(product(lon, lat))) X_train, X_test, y_train, y_test, lon_lat_train, lon_lat_test = \ train_test_split(X, y, lon_lat, train_size=0.7, random_state=10) for ml_model, krige_method in _methods(): reg_kr_model = RegressionKriging(regression_model=ml_model, method=krige_method, n_closest_points=2) reg_kr_model.fit(X_train, lon_lat_train, y_train) assert reg_kr_model.score(X_test, lon_lat_test, y_test) > 0.25
def regression_kriging(file): """ https://pykrige.readthedocs.io/en/latest/examples/regression_kriging2d.html :param file: :return: """ svr_model = SVR(C=0.1) rf_model = RandomForestRegressor(n_estimators=100) lr_model = LinearRegression(normalize=True, copy_X=True, fit_intercept=False) models = [svr_model, rf_model, lr_model] for m in models: print('=' * 40) print('regression model:', m.__class__.__name__) m_rk = RegressionKriging(regression_model=m, n_closest_points=10) m_rk.fit(p_train, x_train, target_train) print('Regression Score: ', m_rk.regression_model.score(p_test, target_test)) print('RK score: ', m_rk.score(p_test, x_test, target_test))
# this dataset can occasionally fail to download on Windows sys.exit(0) # take the first 5000 as Kriging is memory intensive p = housing['data'][:5000, :-2] x = housing['data'][:5000, -2:] target = housing['target'][:5000] p_train, p_test, x_train, x_test, target_train, target_test \ = train_test_split(p, x, target, test_size=0.3, random_state=42) for m in models: print('=' * 40) print('regression model:', m.__class__.__name__) m_rk = RegressionKriging(regression_model=m, n_closest_points=10) m_rk.fit(p_train, x_train, target_train) print('Regression Score: ', m_rk.regression_model.score(p_test, target_test)) print('RK score: ', m_rk.score(p_test, x_test, target_test)) ##====================================OUTPUT================================== # ======================================== # regression model: <class 'sklearn.svm.classes.SVR'> # Finished learning regression model # Finished kriging residuals # Regression Score: -0.034053855457 # RK score: 0.66195576665 # ======================================== # regression model: <class 'sklearn.ensemble.forest.RandomForestRegressor'> # Finished learning regression model
# this dataset can occasionally fail to download on Windows sys.exit(0) # take the first 5000 as Kriging is memory intensive p = housing['data'][:5000, :-2] x = housing['data'][:5000, -2:] target = housing['target'][:5000] p_train, p_test, x_train, x_test, target_train, target_test \ = train_test_split(p, x, target, test_size=0.3, random_state=42) for m in models: print('=' * 40) print('regression model:', m.__class__.__name__) m_rk = RegressionKriging(regression_model=m, n_closest_points=10) m_rk.fit(p_train, x_train, target_train) print('Regression Score: ', m_rk.regression_model.score(p_test, target_test)) print('RK score: ', m_rk.score(p_test, x_test, target_test)) ##====================================OUTPUT================================== # ======================================== # regression model: <class 'sklearn.svm.classes.SVR'> # Finished learning regression model # Finished kriging residuals # Regression Score: -0.034053855457 # RK score: 0.66195576665 # ======================================== # regression model: <class 'sklearn.ensemble.forest.RandomForestRegressor'> # Finished learning regression model # Finished kriging residuals
x_grd = grd.to_numpy() x_y = x_grd[:, :-1] ##numpy array of surface elevation z = x_grd[:, -1:] #%% #constract a randomforest model rf_model = RandomForestRegressor(n_estimators=100) #fit the regression kriging model m_rk = RegressionKriging(regression_model=rf_model, n_closest_points=2, method='ordinary', variogram_model='spherical') #%% m_rk.fit(p, x, sub1.iloc[:, 39]) pred_sph = m_rk.predict(z, x_y) #%% sph = pred_sph #%% #filter values sph[sph < 0.3] = 0 sph.resize(src.height, src.width) show(sph) #%% with rio.Env(): # Write an array as a raster band to a new 8-bit file. For # the new file's profile, we start with the profile of the source