def test_regression_krige():
    np.random.seed(1)
    x = np.linspace(-1.0, 1.0, 100)
    # create a feature matrix with 5 features
    X = np.tile(x, reps=(5, 1)).T
    y = (
        1
        + 5 * X[:, 0]
        - 2 * X[:, 1]
        - 2 * X[:, 2]
        + 3 * X[:, 3]
        + 4 * X[:, 4]
        + 2 * (np.random.rand(100) - 0.5)
    )

    # create lat/lon array
    lon = np.linspace(-180.0, 180.0, 10)
    lat = np.linspace(-90.0, 90.0, 10)
    lon_lat = np.array(list(product(lon, lat)))

    X_train, X_test, y_train, y_test, lon_lat_train, lon_lat_test = train_test_split(
        X, y, lon_lat, train_size=0.7, random_state=10
    )

    for ml_model, krige_method in _methods():
        reg_kr_model = RegressionKriging(
            regression_model=ml_model, method=krige_method, n_closest_points=2
        )
        reg_kr_model.fit(X_train, lon_lat_train, y_train)
        assert reg_kr_model.score(X_test, lon_lat_test, y_test) > 0.25
Exemple #2
0
def regression_kriging_evaluation(year, features_=['all']):
    from pykrige.rk import RegressionKriging
    from sklearn.model_selection import LeaveOneOut
    from sklearn.svm import SVR
    from sklearn.linear_model import LinearRegression
    from sklearn.ensemble import RandomForestRegressor
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    
    svr_model = SVR(kernel='rbf', C=10, gamma=0.001)
    lr_model = LinearRegression()
    rf_model = RandomForestRegressor(n_estimators=2)
    Y, D, P, Tf, Gd = extract_raw_samples(year, crime_t=['total'])
    
    coords = get_centroid_ca()
    
    errors = []
    for k in range(77):
        X_train, X_test, Y_train, Y_test = build_features(Y, D, P, Tf, Y, Gd, Y, k, features=features_, taxi_norm="bydestination")
        if k == 0:
            print X_train.shape
        coords_train = np.delete(coords, k, axis=0)
        coords_test = np.array(coords)[k,None]
        m_rk = RegressionKriging(regression_model=rf_model)
        m_rk.fit(X_train, coords_train, Y_train)
        z = m_rk.predict(X_test, coords_test)
        errors.append(abs(Y_test - z[0]))
    print np.mean(errors), np.mean(errors)/np.mean(Y)
    return errors
def test_krige_housing():
    try:
        housing = fetch_california_housing()
    except PermissionError:
        # This can raise permission error on Appveyor
        pytest.skip('Failed to load california housing dataset')

    # take only first 1000
    p = housing['data'][:1000, :-2]
    x = housing['data'][:1000, -2:]
    target = housing['target'][:1000]

    p_train, p_test, y_train, y_test, x_train, x_test = \
        train_test_split(p, target, x, train_size=0.7,
                         random_state=10)

    for ml_model, krige_method in _methods():

        reg_kr_model = RegressionKriging(regression_model=ml_model,
                                         method=krige_method,
                                         n_closest_points=2)
        reg_kr_model.fit(p_train, x_train, y_train)
        if krige_method == 'ordinary':
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.5
        else:
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
def test_krige_housing():
    import ssl
    import urllib

    try:
        housing = fetch_california_housing()
    except (ssl.SSLError, urllib.error.URLError):
        ssl._create_default_https_context = ssl._create_unverified_context
        try:
            housing = fetch_california_housing()
        except PermissionError:
            # This can raise permission error on Appveyor
            pytest.skip("Failed to load california housing dataset")
        ssl._create_default_https_context = ssl.create_default_context

    # take only first 1000
    p = housing["data"][:1000, :-2]
    x = housing["data"][:1000, -2:]
    target = housing["target"][:1000]

    p_train, p_test, y_train, y_test, x_train, x_test = train_test_split(
        p, target, x, train_size=0.7, random_state=10
    )

    for ml_model, krige_method in _methods():

        reg_kr_model = RegressionKriging(
            regression_model=ml_model, method=krige_method, n_closest_points=2
        )
        reg_kr_model.fit(p_train, x_train, y_train)
        if krige_method == "ordinary":
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.5
        else:
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
def test_regression_krige():
    np.random.seed(1)
    x = np.linspace(-1., 1., 100)
    # create a feature matrix with 5 features
    X = np.tile(x, reps=(5, 1)).T
    y = 1 + 5*X[:, 0] - 2*X[:, 1] - 2*X[:, 2] + 3*X[:, 3] + 4*X[:, 4] + \
        2*(np.random.rand(100) - 0.5)

    # create lat/lon array
    lon = np.linspace(-180., 180.0, 10)
    lat = np.linspace(-90., 90., 10)
    lon_lat = np.array(list(product(lon, lat)))

    X_train, X_test, y_train, y_test, lon_lat_train, lon_lat_test = \
        train_test_split(X, y, lon_lat, train_size=0.7, random_state=10)

    for ml_model, krige_method in _methods():
        reg_kr_model = RegressionKriging(regression_model=ml_model,
                                         method=krige_method,
                                         n_closest_points=2)
        reg_kr_model.fit(X_train, lon_lat_train, y_train)
        assert reg_kr_model.score(X_test, lon_lat_test, y_test) > 0.25
def regression_kriging(file):
    """
    https://pykrige.readthedocs.io/en/latest/examples/regression_kriging2d.html

    :param file:
    :return:
    """
    svr_model = SVR(C=0.1)
    rf_model = RandomForestRegressor(n_estimators=100)
    lr_model = LinearRegression(normalize=True,
                                copy_X=True,
                                fit_intercept=False)

    models = [svr_model, rf_model, lr_model]

    for m in models:
        print('=' * 40)
        print('regression model:', m.__class__.__name__)
        m_rk = RegressionKriging(regression_model=m, n_closest_points=10)
        m_rk.fit(p_train, x_train, target_train)
        print('Regression Score: ',
              m_rk.regression_model.score(p_test, target_test))
        print('RK score: ', m_rk.score(p_test, x_test, target_test))
def test_krige_housing():
    housing = fetch_california_housing()

    # take only first 1000
    p = housing['data'][:1000, :-2]
    x = housing['data'][:1000, -2:]
    target = housing['target'][:1000]

    p_train, p_test, y_train, y_test, x_train, x_test = \
        train_test_split(p, target, x, train_size=0.7,
                         random_state=10)

    for ml_model, krige_method in _methods():

        reg_kr_model = RegressionKriging(regression_model=ml_model,
                                         method=krige_method,
                                         n_closest_points=2)
        reg_kr_model.fit(p_train, x_train, y_train)
        if krige_method == 'ordinary':
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.5
        else:
            assert reg_kr_model.score(p_test, x_test, y_test) > 0.0
except PermissionError:
    # this dataset can occasionally fail to download on Windows
    sys.exit(0)

# take the first 5000 as Kriging is memory intensive
p = housing['data'][:5000, :-2]
x = housing['data'][:5000, -2:]
target = housing['target'][:5000]

p_train, p_test, x_train, x_test, target_train, target_test \
    = train_test_split(p, x, target, test_size=0.3, random_state=42)

for m in models:
    print('=' * 40)
    print('regression model:', m.__class__.__name__)
    m_rk = RegressionKriging(regression_model=m, n_closest_points=10)
    m_rk.fit(p_train, x_train, target_train)
    print('Regression Score: ',
          m_rk.regression_model.score(p_test, target_test))
    print('RK score: ', m_rk.score(p_test, x_test, target_test))

##====================================OUTPUT==================================

# ========================================
#  regression model: <class 'sklearn.svm.classes.SVR'>
# Finished learning regression model
# Finished kriging residuals
# Regression Score:  -0.034053855457
# RK score:  0.66195576665
# ========================================
#  regression model: <class 'sklearn.ensemble.forest.RandomForestRegressor'>
except PermissionError:
    # this dataset can occasionally fail to download on Windows
    sys.exit(0)

# take the first 5000 as Kriging is memory intensive
p = housing['data'][:5000, :-2]
x = housing['data'][:5000, -2:]
target = housing['target'][:5000]

p_train, p_test, x_train, x_test, target_train, target_test \
    = train_test_split(p, x, target, test_size=0.3, random_state=42)

for m in models:
    print('=' * 40)
    print('regression model:', m.__class__.__name__)
    m_rk = RegressionKriging(regression_model=m, n_closest_points=10)
    m_rk.fit(p_train, x_train, target_train)
    print('Regression Score: ', m_rk.regression_model.score(p_test, target_test))
    print('RK score: ', m_rk.score(p_test, x_test, target_test))

##====================================OUTPUT==================================

# ========================================
#  regression model: <class 'sklearn.svm.classes.SVR'>
# Finished learning regression model
# Finished kriging residuals
# Regression Score:  -0.034053855457
# RK score:  0.66195576665
# ========================================
#  regression model: <class 'sklearn.ensemble.forest.RandomForestRegressor'>
# Finished learning regression model
#to this in R. Therefore, we used R to extract centroid pixel values of the src image
grd = pd.read_csv(
    "/home/cvssk/Carlisle_Resubmission/2005Event/SVR/gridded_elevation.csv")
# %%

x_grd = grd.to_numpy()
x_y = x_grd[:, :-1]
##numpy array of surface elevation
z = x_grd[:, -1:]

#%%
#constract a randomforest model
rf_model = RandomForestRegressor(n_estimators=100)
#fit the regression kriging model
m_rk = RegressionKriging(regression_model=rf_model,
                         n_closest_points=2,
                         method='ordinary',
                         variogram_model='spherical')

#%%
m_rk.fit(p, x, sub1.iloc[:, 39])
pred_sph = m_rk.predict(z, x_y)

#%%
sph = pred_sph

#%%
#filter values
sph[sph < 0.3] = 0
sph.resize(src.height, src.width)
show(sph)