コード例 #1
0
def main():
    # Load the diabetes dataset
    diabetes = datasets.load_diabetes()

    # Use only one feature
    X = np.expand_dims(diabetes.data[:, 2], 1)

    # Split the data into training/testing sets
    X_train, X_test = np.array(X[:-20]), np.array(X[-20:])

    # Split the targets into training/testing sets
    y_train, y_test = np.array(diabetes.target[:-20]), np.array(
        diabetes.target[-20:])

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print "Finding regularization constant using cross validation:"
    k = 10
    for regularization_factor in np.arange(0, 0.5, 0.0001):
        cross_validation_sets = k_fold_cross_validation_sets(X_train,
                                                             y_train,
                                                             k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print "\tMean Squared Error: %s (regularization: %s)" % (
            mse, regularization_factor)

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print "Mean squared error: %s (given by reg. factor: %s)" % (
        lowest_error, best_reg_factor)
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.show()
コード例 #2
0
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=3,
                                    noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(X_train,
                                                             y_train,
                                                             k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print("\tMean Squared Error: %s (regularization: %s)" %
              (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s (given by reg. factor: %s)" %
          (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()
コード例 #3
0
def main():
	# Load the diabetes dataset
	diabetes = datasets.load_diabetes()

	# Use only one feature
	X = np.expand_dims(diabetes.data[:, 2], 1)

	# Split the data into training/testing sets
	X_train, X_test = np.array(X[:-20]), np.array(X[-20:])

	# Split the targets into training/testing sets
	y_train, y_test = np.array(diabetes.target[:-20]), np.array(diabetes.target[-20:])

	# Finding regularization constant using cross validation
	lowest_error = float("inf")
	best_reg_factor = None
	print "Finding regularization constant using cross validation:"
	k = 10
	for regularization_factor in np.arange(0,0.5,0.0001):
		cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k)
		mse = 0
		for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
			clf = RidgeRegression(delta=regularization_factor)
			clf.fit(_X_train, _y_train)
			y_pred = clf.predict(_X_test)
			_mse = mean_squared_error(_y_test, y_pred)
			mse += _mse
		mse /= k

		# Print the mean squared error
		print "\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)

		# Save reg. constant that gave lowest error
		if mse < lowest_error:
			best_reg_factor = regularization_factor
			lowest_error = mse

	# Make final prediction
	clf = RidgeRegression(delta=best_reg_factor)
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)
	mse = mean_squared_error(y_test, y_pred)
	print "Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)
	# Plot the results
	plt.scatter(X_test[:,0], y_test,  color='black')
	plt.plot(X_test[:,0], y_pred, color='blue', linewidth=3)
	plt.show()
コード例 #4
0
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print ("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(
            X_train, y_train, k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()