def compare_panorama_cubic(greenery_measure="vegetation", **kwargs):
    """ Compare/plot the segmentation results of panoramic and cubic
        images to each other. Also use linear regression to determine
        how they relate to each other.
    """

    green_kwargs = select_green_model(greenery_measure)

    panorama_tiler = TileManager(cubic_pictures=False, **kwargs, **green_kwargs)
    cubic_tiler = TileManager(cubic_pictures=True, **kwargs, **green_kwargs)

    panorama_green = panorama_tiler.green_direct()
    cubic_green = cubic_tiler.green_direct()

    _remove_missing(panorama_green, cubic_green)
    x = np.arange(0, 0.8, 0.01)

    x_pano = np.array(panorama_green["green"]).reshape(-1, 1)
    y_cubic = np.array(cubic_green["green"])
    reg = LinearRegression().fit(x_pano, y_cubic)
    print(reg.score(x_pano, y_cubic))
    print(reg.coef_[0], reg.intercept_)
    plt.figure()
    plt.scatter(panorama_green["green"], cubic_green["green"])
    plt.plot(x, reg.predict(x.reshape(-1, 1)))
    plt.xlabel("panoramas")
    plt.ylabel("cubic")
    plt.xlim(0, max(0.001, max(panorama_green["green"])*1.1))
    plt.ylim(0, max(0.001, max(cubic_green["green"])*1.1))

    plot_greenery(panorama_green, show=False, title="panorama")
    plot_greenery(cubic_green, show=False, title="cubic")
    plt.show()
Esempio n. 2
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(
                X_aug.T.dot(W).dot(X_aug),
                X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Esempio n. 3
0
 def test_score(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     om.datasets.get('datax')
     om.datasets.get('datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     scores = lr.score(X, Y)
     om.models.put(lr, 'mymodel')
Esempio n. 4
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
                                  X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Esempio n. 5
0
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
Esempio n. 7
0
#print(boston.target)
#切分数据集
X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                    boston.target,
                                                    test_size=0.2,
                                                    random_state=2)

#增加特征多项式让线性回归模型更好地拟合数据
#多项式的个数的不断增加,可以在训练集上有很好的效果,但很容易造成过拟合
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
#多项式线性回归
model2 = LinearRegression(normalize=True)
model2.fit(X_train_poly, y_train)
mutilScore = model2.score(X_test_poly, y_test)
print(mutilScore)

#模型测试,并利用均方根误差(MSE)对测试结果进行评价
#模型的拟合值
y_pred = model2.predict(X_test_poly)
print("MSE:", metrics.mean_squared_error(y_test, y_pred))

#交叉验证
predicted = cross_val_predict(model2, boston.data, boston.target, cv=10)
print("MSE:", metrics.mean_squared_error(boston.target, predicted))

#画图
import matplotlib.pyplot as plt
plt.scatter(boston.target, predicted, color="y", marker="o")
plt.scatter(boston.target, boston.target, color="g", marker="+")
Esempio n. 8
0
boston = load_boston()

#print(boston)
#通过DESCR属性可以查看数据集的详细情况,这里数据有14列,前13列为特征数据,最后一列为标签数据。
#print(boston.DESCR)
#boston的data和target分别存储了特征和标签
#print(boston.data)
#print(boston.target)
#切分数据集
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=2) 

#简单线性回归
model1 = LinearRegression(normalize=True)
model1.fit(X_train, y_train)
#模型的拟合优度
simpleScore=model1.score(X_test, y_test)
print(simpleScore)
##回归系数
#print(model1.coef_)
#截距项
#print(model1.intercept_)
#print(simpleScore)

#模型测试,并利用均方根误差(MSE)对测试结果进行评价
#模型的拟合值
y_pred=model1.predict(X_test)
print("MSE:",metrics.mean_squared_error(y_test, y_pred))

#交叉验证
predicted=cross_val_predict(model1, boston.data, boston.target, cv=10)
print ("MSE:", metrics.mean_squared_error(boston.target, predicted))
Esempio n. 9
0
    plt.scatter( feature, target, color=test_color ) 
for feature, target in zip(feature_train, target_train):
    plt.scatter( feature, target, color=train_color ) 

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")


from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(feature_train, target_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(feature_test, target_test))
### draw the regression line, once it's coded
try:
    plt.plot( feature_test, reg.predict(feature_test) )
except NameError:
    pass
reg.fit(feature_test, target_test)
plt.plot(feature_train, reg.predict(feature_train), color="b")
plt.xlabel(features_list[1])
plt.ylabel(features_list[0])
plt.legend()
plt.show()
print("Slope2 %s" % reg.coef_)
print("Intercept2 %s" % reg.intercept_)
### and n_columns is the number of features
ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here!  Name the regression object reg so that
### the plotting code below works, and you can see what your regression looks like
from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(ages_train, net_worths_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(ages_test, net_worths_test))






try:
    plt.plot(ages, reg.predict(ages), color="blue")
except NameError:
    pass
plt.scatter(ages, net_worths)
plt.show()


### identify and remove the most outlier-y points
# labels
y = np.array(df['label'])
"""
This next section jumbles the rows, but keeps the relationship between X and y.
This is so that we can train the linearRegression model, and then test it on different
data so that we know that it is now able to get the answers right!
"""
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.2)

# Create and train a classifier
clf = LinearRegression(n_jobs=-1)
# training data
clf.fit(X_train, y_train)
# test the data
accuracy = clf.score(X_test, y_test)

# predict future <forecast_col> values
forecast_set = clf.predict(X_lately)

df['Forecast'] = np.nan

# set up dates to use on the graph
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
Esempio n. 12
0
# Splitting data into test_random_forest and train
# train_set, test_set = train_test_split(data_df, test_size=0.01, random_state=np.random.randint(1, 1000))
# Removing all unused variable for memory management

# Separate output from inputs
y_train = data_df['time_to_failure']
x_train_seg = data_df['segment_id']
x_train = data_df.drop(['time_to_failure','segment_id'], axis=1)

# y_test = test_set['time_to_failure']
# x_test_seg = test_set['segment_id']
# x_test = test_set.drop(['time_to_failure'], axis=1)
# x_test = x_test.drop(['segment_id'], axis=1)

model = LinearRegression(n_jobs=4) 
model.fit(x_train, y_train)

mh = ModelHolder(model, most_dependent_columns)
mh.save(model_name)
model = None
mh_new = load_model(model_name)
model, most_dependent_columns = mh_new.get()

print('Evaluating test data , transforming test data now ... ')
print('Calculating score and error .. ')
y_pred = model.predict(x_train)
print('Score', model.score(x_train, y_train))

mas = mean_absolute_error(y_train, y_pred)
print('Mean Absolute Error', mas)