Beispiel #1
0
def main():
    cal_housing = fetch_california_housing()

    # split 80/20 train-test
    X_train, X_test, y_train, y_test = train_test_split(cal_housing.data,
                                                        cal_housing.target,
                                                        test_size=0.2,
                                                        random_state=1)
    names = cal_housing.feature_names
    print('_' * 80)
    print("Training GBRT...")

    clf = GradientBoostingRegressor(n_estimators=100,
                                    max_depth=4,
                                    learning_rate=0.1,
                                    loss='huber',
                                    random_state=1)
    clf.fit(X_train, y_train)
    print("done.")
    print('_' * 80)
    print('Convenience plot with ``partial_dependence_plots``')
    print

    features = [0, 5, 1, 2, (5, 1)]
    fig, axs = plot_partial_dependence(clf,
                                       X_train,
                                       features,
                                       feature_names=names,
                                       n_jobs=3,
                                       grid_resolution=50)
    fig.suptitle(
        'Partial dependence of house value on nonlocation features for the California housing dataset'
    )
    plt.subplots_adjust(top=0.9)
    print('_' * 80)
    print('Custom 3d plot via ``partial_dependence``')
    print

    fig = plt.figure()
    target_feature = (1, 5)
    pdp, (x_axis, y_axis) = partial_dependence(clf,
                                               target_feature,
                                               X=X_train,
                                               grid_resolution=50)
    XX, YY = np.meshgrid(x_axis, y_axis)
    Z = pdp.T.reshape(XX.shape).T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu)
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')

    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle(
        'Partial dependence of house value on median age and average occupancy'
    )
    plt.subplots_adjust(top=0.9)
    plt.show()
Beispiel #2
0
def test():
    # data
    cal_housing = fetch_california_housing()
    X = cal_housing['data']
    Y = np.reshape(cal_housing['target'], (-1, 1))

    # train models
    iters = 100
    name = ["0", "1", "2", "3", "4"]
    model = [
        SCNN(8, 1, 0, update=Update.Rprop()),
        SCNN(8, 1, 1, update=Update.Rprop()),
        SCNN(8, 1, 2, update=Update.Rprop()),
        SCNN(8, 1, 3, update=Update.Rprop()),
        SCNN(8, 1, 4, update=Update.Rprop())
    ]
    error = np.zeros((len(model), iters))
    for i in range(iters):
        for m in range(len(model)):
            error[m, i] = model[m].partial_fit(X, Y)
        print(i + 1, "complete")

    # plot results
    plt.figure()
    plt.title('Error Curves')
    for m in range(len(model)):
        plt.semilogy(error[m], label=name[m])
    plt.legend()

    plt.show()
Beispiel #3
0
def test_abs_loss():
    X, y = california_housing.fetch_california_housing(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    regressor = GradientBooster(n_iter=100, loss_function=AbsoluteLoss())
    regressor.fit(X_train, y_train)

    y_pred = regressor.predict(X_test)    
    print(f"Test Mean absolute error: {mean_absolute_error(y_pred, y_test):.4f}")
def main():
    # fetch California housing dataset
    try:
        cal_housing = fetch_california_housing()
    except HTTPError:
        print("Failed downloading california housing data.")
        return

    # split 80/20 train-test
    X_train, X_test, y_train, y_test = train_test_split(cal_housing.data,
                                                        cal_housing.target,
                                                        test_size=0.2,
                                                        random_state=1)
    names = cal_housing.feature_names

    print('_' * 80)
    print("Training GBRT...")
    clf = GradientBoostingRegressor(n_estimators=100, max_depth=4,
                                    learning_rate=0.1, loss='huber',
                                    random_state=1)
    clf.fit(X_train, y_train)
    print("done.")

    print('_' * 80)
    print('Convenience plot with ``partial_dependence_plots``')
    print

    features = [0, 5, 1, 2, (5, 1)]
    fig, axs = plot_partial_dependence(clf, X_train, features, feature_names=names,
                                       n_jobs=3, grid_resolution=50)
    fig.suptitle('Partial dependence of house value on nonlocation features\n'
                 'for the California housing dataset')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print('_' * 80)
    print('Custom 3d plot via ``partial_dependence``')
    print
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, (x_axis, y_axis) = partial_dependence(clf, target_feature,
                                               X=X_train, grid_resolution=50)
    XX, YY = np.meshgrid(x_axis, y_axis)
    Z = pdp.T.reshape(XX.shape).T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu)
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median age and '
                 'average occupancy')
    plt.subplots_adjust(top=0.9)

    plt.show()
def generate_house_data():
    from sklearn.datasets.california_housing import fetch_california_housing
    from sklearn.preprocessing import StandardScaler
    houses = fetch_california_housing()
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(houses.data)
    X_train, X_val, Y_train, Y_val = train_test_split(scaled_data,
                                                      houses.target)
    print('Train data shape', X_train.shape)
    print('Validation data shape', X_val.shape)
    return X_train, X_val, Y_train, Y_val
def gbm_monotone_smoke_test():
    cal_housing = fetch_california_housing()
    data = h2o.H2OFrame(cal_housing.data, column_names=cal_housing.feature_names)

    data["target"] = h2o.H2OFrame(cal_housing.target)

    train, test = data.split_frame([0.6], seed=123)

    feature_names = ['MedInc', 'AveOccup', 'HouseAge']
    monotone_constraints = {"MedInc": 1, "AveOccup": -1, "HouseAge": 1}

    gbm_mono = H2OGradientBoostingEstimator(monotone_constraints=monotone_constraints, seed=42)
    gbm_mono.train(x=feature_names, y="target", training_frame=train, validation_frame=test)
def gbm_monotone_smoke_test():
    cal_housing = fetch_california_housing()
    data = h2o.H2OFrame(cal_housing.data,
                        column_names=cal_housing.feature_names)

    data["target"] = h2o.H2OFrame(cal_housing.target)

    train, test = data.split_frame([0.6], seed=123)

    feature_names = ['MedInc', 'AveOccup', 'HouseAge']
    monotone_constraints = {"MedInc": 1, "AveOccup": -1, "HouseAge": 1}

    gbm_mono = H2OGradientBoostingEstimator(
        monotone_constraints=monotone_constraints, seed=42)
    gbm_mono.train(x=feature_names,
                   y="target",
                   training_frame=train,
                   validation_frame=test)
Beispiel #8
0
def test_weighted_nurturing():
    # X1 = np.arange(0, 10, 0.1)
    # X2 = np.arange(10, 20, 0.1)
    # y_train = np.sin(X1).ravel() + np.cos(X2).ravel()
    # X_train = pd.DataFrame(np.array([X1, X2]).T, columns=['x1', 'x2'])
    # X_test, y_test = X_train, y_train
    cal_housing = fetch_california_housing()
    X_train, X_test, y_train, y_test = \
        train_test_split(cal_housing.data, cal_housing.target,
                         test_size=0.3, random_state=5)
    X_train = pd.DataFrame(X_train, columns=cal_housing.feature_names)
    X_test = pd.DataFrame(X_test, columns=cal_housing.feature_names)

    model_params = {
        'max_depth': 10, 'n_estimators': 200, 'random_state': 5,
        'n_jobs': -1, 'bootstrap': True,
    }
    with StopWatch('weighted nurturing'):
        nurtured_ensemble = weighted_nurturing(
            RandomForestRegressor, X_train, y_train,
            feature_names=X_train.columns,
            n_iterations=20, metric_function='mse',
            n_tunes=50, update_weight=0.3,
            model_params=model_params)

    print("The mean-squared error for nurtured ensemble prediction: {}".format(
          mean_squared_error(nurtured_ensemble.predict(X_test), y_test)))
    # plain_rf = RandomForestRegressor(
    #     min_samples_leaf=100, max_depth=8,
    #     n_estimators=1000, random_state=5, n_jobs=-1)
    plain_params = {'random_state': 5, }
    plain_rf = RandomForestRegressor(n_estimators=3000, max_depth=8, n_jobs=-1, **plain_params)
    plain_rf.fit(X_train, y_train)
    plain_gbr = GradientBoostingRegressor(
        n_estimators=1000, max_depth=5, **plain_params)
    plain_gbr.fit(X_train, y_train)

    print("The mean-squared error for plain RandomForest prediction: {}".format(
          mean_squared_error(plain_rf.predict(X_test), y_test)))
    print("The mean-squared error for plain GradientBoost prediction: {}".format(
          mean_squared_error(plain_gbr.predict(X_test), y_test)))
run_id,experiment_id,experiment_name,run_id,metric,run_mode = create_predict_widgets(exp_name,metrics)

run_id,experiment_id,experiment_name,run_id,metric,run_mode,exp_name

# COMMAND ----------

dump_run_id(run_id)

# COMMAND ----------

# MAGIC %md ### Read data

# COMMAND ----------

from sklearn.datasets.california_housing import fetch_california_housing
data = fetch_california_housing().data

# COMMAND ----------

# MAGIC %md ### Review the MLflow UI

# COMMAND ----------

display_run_uri(experiment_id, run_id)

# COMMAND ----------

# MAGIC %md ### Predict
# MAGIC 
# MAGIC Let's now register our Keras model as a Spark UDF to apply to rows in parallel.
from sklearn.datasets.california_housing import fetch_california_housing
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import numpy as np
from sklearn.neighbors import BallTree

## --------- create dataframe, add .target ----------

dataset = fetch_california_housing()
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
df['target'] = pd.Series(dataset.target)

## --------- 2 features chosen for training ---------

X = df[['Population', 'HouseAge']]
print('_____________________________________')
print('Dataset before train split')
print(X.describe())
y = df.target

## ------------- split for train & test -------------

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=10,
                                                    test_size=0.2)
df2 = pd.DataFrame(data=X_train, columns=['Population', 'HouseAge'])

## ---------------- feature distance ----------------
Beispiel #11
0
import importlib

packages = [
    'pandas', 'IPython', 'statsmodels', 'sklearn', 'seaborn', 'requests',
    'scipy', 'notebook'
]

bad = []
for package in packages:
    try:
        importlib.import_module(package)
    except ImportError:
        bad.append("Can't import %s" % package)
else:
    if len(bad) > 0:
        print('\n'.join(bad))
    else:
        from sklearn.datasets import california_housing
        print("Caching california_housing")
        data = california_housing.fetch_california_housing()
        print("All good. Enjoy the tutorial!")
def main():
    cal_housing = fetch_california_housing()

    X, y = cal_housing.data, cal_housing.target
    names = cal_housing.feature_names

    # Center target to avoid gradient boosting init bias: gradient boosting
    # with the 'recursion' method does not account for the initial estimator
    # (here the average target, by default)
    y -= y.mean()

    print("Training MLPRegressor...")
    est = MLPRegressor(activation='logistic')
    est.fit(X, y)
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est,
                            X,
                            features,
                            feature_names=names,
                            n_jobs=3,
                            grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with MLPRegressor')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training GradientBoostingRegressor...")
    est = GradientBoostingRegressor(n_estimators=100,
                                    max_depth=4,
                                    learning_rate=0.1,
                                    loss='huber',
                                    random_state=1)
    est.fit(X, y)
    print('Computing partial dependence plots...')
    features = [0, 5, 1, 2, (5, 1)]
    plot_partial_dependence(est,
                            X,
                            features,
                            feature_names=names,
                            n_jobs=3,
                            grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    print('Custom 3d plot via ``partial_dependence``')
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, axes = partial_dependence(est, X, target_feature, grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX,
                           YY,
                           Z,
                           rstride=1,
                           cstride=1,
                           cmap=plt.cm.BuPu,
                           edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median\n'
                 'age and average occupancy, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    plt.show()
Beispiel #13
0
    def ml_GradientBoostingClassifier2(self):
        # This example shows how to obtain partial dependence plots from a GradientBoostingRegressor trained on the California housing dataset.
        cal_housing = fetch_california_housing()

        # split 80/20 train-tests
        X_train, X_test, y_train, y_test = train_test_split(cal_housing.data,
                                                            cal_housing.target,
                                                            test_size=0.2,
                                                            random_state=1)
        names = cal_housing.feature_names

        print("Training GBRT...")
        clf = GradientBoostingRegressor(n_estimators=100,
                                        max_depth=4,
                                        learning_rate=0.1,
                                        loss='huber',
                                        random_state=1)
        clf.fit(X_train, y_train)
        print(" done.")

        print('Convenience plot with ``partial_dependence_plots``')

        features = [0, 5, 1, 2, (5, 1)]
        fig, axs = plot_partial_dependence(clf,
                                           X_train,
                                           features,
                                           feature_names=names,
                                           n_jobs=3,
                                           grid_resolution=50)
        fig.suptitle(
            'Partial dependence of house value on nonlocation features\n'
            'for the California housing dataset')
        plt.subplots_adjust(
            top=0.9)  # tight_layout causes overlap with suptitle

        print('Custom 3d plot via ``partial_dependence``')
        fig = plt.figure()

        target_feature = (1, 5)
        pdp, axes = partial_dependence(clf,
                                       target_feature,
                                       X=X_train,
                                       grid_resolution=50)
        XX, YY = np.meshgrid(axes[0], axes[1])
        Z = pdp[0].reshape(list(map(np.size, axes))).T
        ax = Axes3D(fig)
        surf = ax.plot_surface(XX,
                               YY,
                               Z,
                               rstride=1,
                               cstride=1,
                               cmap=plt.cm.BuPu,
                               edgecolor='k')
        ax.set_xlabel(names[target_feature[0]])
        ax.set_ylabel(names[target_feature[1]])
        ax.set_zlabel('Partial dependence')
        #  pretty init view
        ax.view_init(elev=22, azim=122)
        plt.colorbar(surf)
        plt.suptitle('Partial dependence of house value on median\n'
                     'age and average occupancy')
        plt.subplots_adjust(top=0.9)

        plt.show()
Beispiel #14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/7/31 0031 20:35

# @Author : Shulin Liu
from sklearn.datasets.california_housing import fetch_california_housing
from sklearn import tree
import pydotplus
from IPython.display import Image
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

house = fetch_california_housing()
print(house.DESCR)
dtr = tree.DecisionTreeRegressor(max_depth=2)  # 决策树的最大深度为2
dtr.fit(house.data[:, [6, 7]], house.target)
'''
决策树可视化
'''
dot_data = tree.export_graphviz(dtr,
                                out_file=None,
                                feature_names=house.feature_names[6:8],
                                filled=True,
                                impurity=False,
                                rounded=True)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.get_nodes()[7].set_fillcolor("#FFF2DD")
Image(graph.create_png())
graph.write_png('dtr_white_background.png')
'''
def main():
    cal_housing = fetch_california_housing()

    X, y = cal_housing.data, cal_housing.target
    names = cal_housing.feature_names

    # Center target to avoid gradient boosting init bias: gradient boosting
    # with the 'recursion' method does not account for the initial estimator
    # (here the average target, by default)
    y -= y.mean()

    print("Training SNN_Regressor...")
    est = SNN_Regressor(8,
                        1,
                        10,
                        10,
                        hiddenAct=Activation.Tanh(),
                        error=Error.Mse(),
                        update=Update.RmsProp(0.001, rateDecay=0.9))

    t = [
        (3, lambda e: e.cool()),  # cool
        (6, lambda e: Trainer.prune(e, X, y)),  # prune
        #  ( 18, lambda e: e.cool() ), # cool
        (9,
         lambda e: Trainer.grow(e, max(1, 1 + int(np.log(e.hiddenSize_ + 1))))
         ),  # grow
        #  ( 11, lambda e: e.cool() ), # cool
    ]
    growLoss = Trainer.train(est, X, y, batch=1, maxIter=100, triggers=t)
    est.maxIter_ = 1000
    plt.semilogy(growLoss, label='Grow')
    plt.legend()
    #  plt.show()
    #  pdb.set_trace()

    print("SNN weights:", est.weight_)
    print("SNN dweight:", est.dWeight_)
    print("SNN nHidden:", est.hiddenSize_)
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est,
                            X,
                            features,
                            feature_names=names,
                            n_jobs=3,
                            grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with SNN_Regressor...')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training MLPRegressor...")
    est = MLPRegressor(activation='logistic')
    est.fit(X, y)
    print('MLP Loss: ', np.average(Error.Mse().f(y, est.predict(X))))
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est,
                            X,
                            features,
                            feature_names=names,
                            n_jobs=3,
                            grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with MLPRegressor')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training GradientBoostingRegressor...")
    est = GradientBoostingRegressor(n_estimators=100,
                                    max_depth=4,
                                    learning_rate=0.1,
                                    loss='huber',
                                    random_state=1)
    est.fit(X, y)
    print('Computing partial dependence plots...')
    features = [0, 5, 1, 2, (5, 1)]
    plot_partial_dependence(est,
                            X,
                            features,
                            feature_names=names,
                            n_jobs=3,
                            grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    print('Custom 3d plot via ``partial_dependence``')
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, axes = partial_dependence(est, X, target_feature, grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX,
                           YY,
                           Z,
                           rstride=1,
                           cstride=1,
                           cmap=plt.cm.BuPu,
                           edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median\n'
                 'age and average occupancy, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    plt.show()
def main():
    cal_housing = fetch_california_housing()

    X, y = cal_housing.data, cal_housing.target
    names = cal_housing.feature_names

    # Center target to avoid gradient boosting init bias: gradient boosting
    # with the 'recursion' method does not account for the initial estimator
    # (here the average target, by default)
    y -= y.mean()

    print("Training MLPRegressor...")
    est = MLPRegressor(activation='logistic')
    est.fit(X, y)
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with MLPRegressor')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training GradientBoostingRegressor...")
    est = GradientBoostingRegressor(n_estimators=100, max_depth=4,
                                    learning_rate=0.1, loss='huber',
                                    random_state=1)
    est.fit(X, y)
    print('Computing partial dependence plots...')
    features = [0, 5, 1, 2, (5, 1)]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    print('Custom 3d plot via ``partial_dependence``')
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, axes = partial_dependence(est, X, target_feature,
                                   grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1,
                           cmap=plt.cm.BuPu, edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median\n'
                 'age and average occupancy, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    plt.show()
 def setUp(self):
     self.dataset = fetch_california_housing()
"""
print(__doc__)

import numpy as np
import pylab as pl

from mpl_toolkits.mplot3d import Axes3D

from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble.partial_dependence import partial_dependence
from sklearn.datasets.california_housing import fetch_california_housing

# fetch California housing dataset
cal_housing = fetch_california_housing()

# split 80/20 train-test
X_train, X_test, y_train, y_test = train_test_split(cal_housing.data,
                                                    cal_housing.target,
                                                    test_size=0.2,
                                                    random_state=1)
names = cal_housing.feature_names

print('_' * 80)
print("Training GBRT...")
clf = GradientBoostingRegressor(n_estimators=100, max_depth=4,
                                learning_rate=0.1, loss='huber',
                                random_state=1)
clf.fit(X_train, y_train)
print("done.")
import importlib

packages = ["pandas", "IPython", "statsmodels", "sklearn", "seaborn", "toolz", "bs4", "requests", "scipy", "tables"]

bad = []
for package in packages:
    try:
        importlib.import_module(package)
    except ImportError:
        bad.append("Can't import %s" % package)
else:
    if len(bad) > 0:
        print("\n".join(bad))
    else:
        from sklearn.datasets import california_housing

        print("Caching california_housing")
        data = california_housing.fetch_california_housing()
        print("All good. Enjoy the tutorial!")
Beispiel #20
0
    def __init__(self):
        self.data = california_housing.fetch_california_housing()


        self.X_train, self.X_test, self.y_train, self.y_test =\
            train_test_split(self.data.data,self.data.target,random_state=42,test_size=0.2)
Beispiel #21
0
"""
for stuff on techniques used for mnist

http://yann.lecun.com/exdb/mnist/
"""
from sklearn.datasets import load_digits
from sklearn.datasets.california_housing import fetch_california_housing
# write data into ./mldata/mnist-original.mat
mnist = load_digits(return_X_y=True)

housing = fetch_california_housing(data_home='.')
Beispiel #22
0
#!/usr/bin/env python 3.6
# -*- coding: utf-8 -*-
"""
# @Company :华中科技大学机械学院数控中心
# @version : V1.0
# @Author  : lizhaofu
# @contact : [email protected]  2018--2022
# @Time    : 2019/11/27 16:27
# @File    : decision_tree_regression_visualization.py
# @Software: PyCharm
"""

from sklearn.datasets.california_housing import fetch_california_housing
import pydotplus

housing = fetch_california_housing()  ###调用sklearn自带的数集
# print(housing.DESCR)
print(housing.data.shape)
print(housing.data[1])
print(len(housing.data[1]))
print(housing.feature_names)
print(housing.target)

#####取要使用的特征做决策树
from sklearn import tree

dtr = tree.DecisionTreeRegressor(max_depth=4)
dtr.fit(housing.data[:, [3, 4, 5, 6, 7]], housing.target)  ###取房子所在的经度和纬度
###输出构造决策树模型的一些基本参数,有些事默认的
print(dtr)
Beispiel #23
0
 def get_datas(self):
     """
     获取加利福尼亚的房屋信息
     """
     housing = fetch_california_housing()
     return housing
Beispiel #24
0
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 26 09:35:56 2019

@author: Morten Sahlertz
"""

from sklearn.datasets.california_housing import fetch_california_housing
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

cal_housing = fetch_california_housing()
X, y = cal_housing.data, cal_housing.target
names = cal_housing.feature_names

#%% New variable names
median_income = X[:, np.newaxis, 0]
median_house_value = y

#%% Histogram for median income
fig1 = plt.figure()
ax = fig1.add_subplot(111)
ax.hist(median_income, bins=100)  # histogram
ax.set_title('Median Income Histogram')
ax.set_xlabel('Median Income')
ax.set_ylabel('Occurences')

#%% Standard deviation, mean and median
standard = np.std(median_income)
print('Spredningen er: {0:.5f}'.format(standard))
Beispiel #25
0
 def read_data(self):
     self.__all = fetch_california_housing()
     self.__train_feature, self.__test_feature, self.__train_label, self.__test_label = train_test_split(
         self.__all.data, self.__all.target, test_size=0.2, random_state=1)