def test_load_diabetes():
    res = load_diabetes()
    assert_equal(res.data.shape, (442, 10))
    assert_true(res.target.size, 442)

    # test return_X_y option
    X_y_tuple = load_diabetes(return_X_y=True)
    bunch = load_diabetes()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)
def ModelSelectionTest01():
	from sklearn import datasets, svm
	import numpy as np
	digits = datasets.load_digits()
	X_digits = digits.data
	Y_digits = digits.target
	svc = svm.SVC(C = 1, kernel = 'linear')
	score = svc.fit(X_digits[:-100], Y_digits[:-100]).score(X_digits[-100:], Y_digits[-100:])

	#print score

	X_folds = np.array_split(X_digits, 3)
	Y_folds = np.array_split(Y_digits, 3)

	#print len(X_folds[0])

	scores = list()

	for k in range(3):
		X_train = list(X_folds) #这里的X_folds是一个具有3个元素的list
		X_test = X_train.pop(k) #test是train的第K个元素
		X_train = np.concatenate(X_train) #这里是把X_train减去X_test
		#print len(X_train)
		Y_train = list(Y_folds)
		Y_test = Y_train.pop(k)
		Y_train = np.concatenate(Y_train)

		scores.append(svc.fit(X_train, Y_train).score(X_test, Y_test))

	#print scores


	from sklearn import cross_validation
	k_fold = cross_validation.KFold(n = 6, n_folds = 3)
	for train_indices, test_indices in k_fold:
		print train_indices, test_indices

	k_fold = cross_validation.KFold(len(X_digits), n_folds = 3)
	scores = [svc.fit(X_digits[train], Y_digits[train]).score(X_digits[test], Y_digits[test]) for train , test in k_fold]

	#print scores

	scores = cross_validation.cross_val_score(svc, X_digits, Y_digits, cv = k_fold, n_jobs = 1)
	#print scores

	from sklearn.grid_search import GridSearchCV
	gammas = np.logspace(-6, -1, 10)
	clf = GridSearchCV(estimator = svc, param_grid = dict(gamma = gammas), n_jobs = 1)
	clf.fit(X_digits[:1000], Y_digits[:1000])
	print clf.best_score_
	print clf.best_estimator_.gamma

	from sklearn import linear_model, datasets
	lasso = linear_model.LassoCV()    #这里的lassoCV和lasso有什么区别?
	diabetes = datasets.load_diabetes()
	X_diabetes = diabetes.data
	Y_diabetes = diabetes.target
	lasso.fit(X_diabetes, Y_diabetes)

	print lasso.alpha_
def supervisedTest02():
	import numpy as np
	from sklearn import datasets

	diabetes = datasets.load_diabetes()
	diabetes_X_train = diabetes.data[:-20]
	diabetes_X_test  = diabetes.data[-20:]
	diabetes_Y_train = diabetes.target[:-20]
	diabetes_Y_test  = diabetes.target[-20:]

	from sklearn import linear_model
	regr = linear_model.LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
	regr.fit(diabetes_X_train, diabetes_Y_train)

	#print regr.coef_  #注意因为diabetes_X_train的特征是4维,所以coef_的个数是4+1 = 5

	mean_err = np.mean((regr.predict(diabetes_X_test) - diabetes_Y_test) ** 2)
	score = regr.score(diabetes_X_test, diabetes_Y_test) #这是判断test数据预测程度

	print mean_err
	print score


	print len(diabetes.data)    #样本数目
	print len(diabetes.data[0]) #特征维数
예제 #4
0
    def load_diabetes_data():
        """
        Load the diabetes data set from scikit learn

        Args:
            None

        Returns:
            diabetes_X_train: Training features for diabetes data set
            diabetes_X_test: Test set features for diabetes data set
            diabetes_y_train: Target variables of the training set
            diabetes_y_test: Target variables of the test set
        """
        diabetes = datasets.load_diabetes()
        diabetes_X, diabetes_y = diabetes.data, diabetes.target

        # Split the data set as
        # 70 % -> Training set
        # 30 % -> Test set

        limit = 0.7 * len(diabetes_y)
        diabetes_X_train = diabetes_X[:limit]
        diabetes_X_test = diabetes_X[limit:]
        diabetes_y_train = diabetes_y[:limit]
        diabetes_y_test = diabetes_y[limit:]
        return diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test
예제 #5
0
파일: test_grnn.py 프로젝트: itdxer/neupy
    def test_simple_grnn(self):
        dataset = datasets.load_diabetes()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        x_train_before = x_train.copy()
        x_test_before = x_test.copy()
        y_train_before = y_train.copy()

        grnnet = algorithms.GRNN(std=0.1, verbose=False)
        grnnet.train(x_train, y_train)
        result = grnnet.predict(x_test)
        error = metrics.mean_absolute_error(result, y_test)

        old_result = result.copy()
        self.assertAlmostEqual(error, 46.3358, places=4)

        # Test problem with variable links
        np.testing.assert_array_equal(x_train, x_train_before)
        np.testing.assert_array_equal(x_test, x_test_before)
        np.testing.assert_array_equal(y_train, y_train_before)

        x_train[:, :] = 0
        result = grnnet.predict(x_test)

        np.testing.assert_array_almost_equal(result, old_result)
예제 #6
0
    def test_levenberg_marquardt(self):
        dataset = datasets.load_diabetes()
        data, target = dataset.data, dataset.target

        data_scaler = preprocessing.MinMaxScaler()
        target_scaler = preprocessing.MinMaxScaler()

        x_train, x_test, y_train, y_test = train_test_split(
            data_scaler.fit_transform(data),
            target_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.85
        )

        # Network
        lmnet = algorithms.LevenbergMarquardt(
            connection=[
                layers.SigmoidLayer(10),
                layers.SigmoidLayer(40),
                layers.OutputLayer(1),
            ],
            mu_increase_factor=2,
            mu=0.1,
            show_epoch=10,
            use_bias=False,
            verbose=False,
        )
        lmnet.train(x_train, y_train, epochs=100)
        y_predict = lmnet.predict(x_test)

        error = rmsle(target_scaler.inverse_transform(y_test),
                      target_scaler.inverse_transform(y_predict).round())
        error

        self.assertAlmostEqual(0.4372, error, places=4)
예제 #7
0
def linearReg():
    from sklearn import datasets
    diabetes = datasets.load_diabetes()
    diabetes_X_train = diabetes.data[:-20]
    diabetes_X_test = diabetes.data[-20:]
    diabetes_y_train = diabetes.target[:-20]
    diabetes_y_test = diabetes.target[-20:]
    from sklearn import linear_model
    regr = linear_model.LinearRegression()
    regr.fit(diabetes_X_train, diabetes_y_train)
    print(regr.coef_)
    import numpy as np
    np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)
    regr.score(diabetes_X_test, diabetes_y_test)

    X = np.c_[.5, 1].T
    y = [.5, 1]
    test = np.c_[0, 2].T
    regr = linear_model.LinearRegression()

    import pylab as pl
    pl.figure()
    np.random.seed(0)
    for _ in range(6):
        this_X = .1 * np.random.normal(size=(2, 1)) + X
        regr.fit(this_X, y)
        pl.plot(test, regr.predict(test))
        pl.scatter(this_X, y, s=3)
예제 #8
0
def test_linearsvr_fit_sampleweight():
    # check correct result when sample_weight is 1
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    n_samples = len(diabetes.target)
    unit_weight = np.ones(n_samples)
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                    sample_weight=unit_weight)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                           sample_weight=random_weight)
    score3 = lsvr_unflat.score(diabetes.data, diabetes.target,
                               sample_weight=random_weight)

    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
    lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat)
    score4 = lsvr_flat.score(X_flat, y_flat)

    assert_almost_equal(score3, score4, 2)
예제 #9
0
    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])
예제 #10
0
    def test_hessian_diagonal(self):
        dataset = datasets.load_diabetes()
        data, target = dataset.data, dataset.target

        input_scaler = preprocessing.StandardScaler()
        target_scaler = preprocessing.StandardScaler()

        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            input_scaler.fit_transform(data),
            target_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.8
        )

        nw = algorithms.HessianDiagonal(
            connection=[
                layers.SigmoidLayer(10),
                layers.SigmoidLayer(20),
                layers.OutputLayer(1)
            ],
            step=1.5,
            shuffle_data=False,
            verbose=False,
            min_eigenvalue=1e-10
        )
        nw.train(x_train, y_train, epochs=10)
        y_predict = nw.predict(x_test)

        error = rmsle(target_scaler.inverse_transform(y_test),
                      target_scaler.inverse_transform(y_predict).round())

        self.assertAlmostEqual(0.5032, error, places=4)
예제 #11
0
    def test_mixture_of_experts(self):
        dataset = datasets.load_diabetes()
        data, target = asfloat(dataset.data), asfloat(dataset.target)
        insize, outsize = data.shape[1], 1

        input_scaler = preprocessing.MinMaxScaler((-1 ,1))
        output_scaler = preprocessing.MinMaxScaler()
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            input_scaler.fit_transform(data),
            output_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.8
        )

        n_epochs = 10
        scaled_y_test = output_scaler.inverse_transform(y_test)
        scaled_y_test = scaled_y_test.reshape((y_test.size, 1))

        # -------------- Train single GradientDescent -------------- #

        bpnet = algorithms.GradientDescent(
            (insize, 20, outsize),
            step=0.1,
            verbose=False
        )
        bpnet.train(x_train, y_train, epochs=n_epochs)
        network_output = bpnet.predict(x_test)
        network_error = rmsle(output_scaler.inverse_transform(network_output),
                              scaled_y_test)

        # -------------- Train ensemlbe -------------- #

        moe = algorithms.MixtureOfExperts(
            networks=[
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
                algorithms.Momentum(
                    (insize, 20, outsize),
                    step=0.1,
                    batch_size=1,
                    verbose=False
                ),
            ],
            gating_network=algorithms.Momentum(
                layers.Softmax(insize) > layers.Output(2),
                step=0.1,
                verbose=False
            )
        )
        moe.train(x_train, y_train, epochs=n_epochs)
        ensemble_output = moe.predict(x_test)
        ensemlbe_error = rmsle(
            output_scaler.inverse_transform(ensemble_output),
            scaled_y_test
        )

        self.assertGreater(network_error, ensemlbe_error)
예제 #12
0
    def test_pipeline(self):
        dataset = datasets.load_diabetes()
        target_scaler = preprocessing.MinMaxScaler()
        target = dataset.target.reshape(-1, 1)

        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data,
            target_scaler.fit_transform(target),
            train_size=0.85
        )

        network = algorithms.GradientDescent(
            connection=[
                layers.Input(10),
                layers.Sigmoid(25),
                layers.Sigmoid(1),
            ],
            show_epoch=100,
            verbose=False,
        )
        pipeline = Pipeline([
            ('min_max_scaler', preprocessing.MinMaxScaler()),
            ('gd', network),
        ])
        pipeline.fit(x_train, y_train, gd__epochs=50)
        y_predict = pipeline.predict(x_test)

        error = rmsle(target_scaler.inverse_transform(y_test),
                      target_scaler.inverse_transform(y_predict).round())
        self.assertAlmostEqual(0.48, error, places=2)
def gmm_clustering():
    conversion = {
        0: 2,
        1: 0,
        2: 1,
    }

    g = mixture.GMM(n_components=3)

    iris_data = datasets.load_iris()
    diabetes_data = datasets.load_diabetes()
    data = iris_data

    # Generate random observations with two modes centered on 0
    # and 10 to use for training.
    np.random.seed(0)
    obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
    g.fit(data.data)

    print("Target classification")
    print(data.target)
    results = g.predict(data.data)
    results = [conversion[item] for item in results]

    print("\nResults")
    print(np.array(results))
    compare = [results[i] == data.target[i] for i in range(len(results))]

    accuracy_count = [item for item in compare if item == True]

    print("\nAccuracy: {:.0%}".format(float(len(accuracy_count)) / len(compare)))
    print(max(data.target))
def main():
    diabetes = datasets.load_diabetes()
    # Use only one feature
    diabetes_X = diabetes.data[:, np.newaxis, 2]
    diabetes_X = scale(diabetes_X)
    diabetes_y = scale(diabetes.target)

    diabetes_X_train = diabetes_X[:-20]
    diabetes_X_test = diabetes_X[-20:]
    # diabetes_y_train = diabetes.target[:-20]
    # diabetes_y_test = diabetes.target[-20:]
    diabetes_y_train = diabetes_y[:-20]
    diabetes_y_test = diabetes_y[-20:]

    # regr = linear_model.LinearRegression()
    regr = LinearRegression(n_iter=50, fit_alg="batch")
    # regr = LinearRegressionNormal()
    regr.fit(diabetes_X_train, diabetes_y_train)

    # regr.fit(np.array([[0, 0], [1, 1], [2, 2]]), np.array([0, 1, 2]))
    # print(regr.predict(np.array([[3, 3]])))

    # print('Coefficients: \n', regr.coef_)
    # print("Residual sum of squares: %.2f"
    #       % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
    print("Variance score: %.2f" % regr.score(diabetes_X_test, diabetes_y_test))
예제 #15
0
    def test_grid_search(self):
        def scorer(network, X, y):
            result = network.predict(X)
            return rmsle(result[:, 0], y)

        dataset = datasets.load_diabetes()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        grnnet = algorithms.GRNN(std=0.5, verbose=False)
        grnnet.train(x_train, y_train)
        error = scorer(grnnet, x_test, y_test)

        self.assertAlmostEqual(0.513, error, places=3)

        random_search = model_selection.RandomizedSearchCV(
            grnnet,
            param_distributions={'std': np.arange(1e-2, 0.1, 1e-4)},
            n_iter=10,
            scoring=scorer,
            random_state=self.random_seed
        )
        random_search.fit(dataset.data, dataset.target)
        scores = random_search.cv_results_

        best_score = min(scores['mean_test_score'])
        self.assertAlmostEqual(0.4266, best_score, places=3)
예제 #16
0
    def test_grid_search(self):
        def scorer(network, X, y):
            result = network.predict(X)
            return rmsle(result, y)

        dataset = datasets.load_diabetes()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        grnnet = algorithms.GRNN(std=0.5, verbose=False)
        grnnet.train(x_train, y_train)
        error = scorer(grnnet, x_test, y_test)

        self.assertAlmostEqual(0.513, error, places=3)

        random_search = grid_search.RandomizedSearchCV(
            grnnet,
            param_distributions={'std': np.arange(1e-2, 1, 1e-4)},
            n_iter=10,
            scoring=scorer
        )
        random_search.fit(dataset.data, dataset.target)
        scores = random_search.grid_scores_

        best_score = min(scores, key=itemgetter(1))
        self.assertAlmostEqual(0.452, best_score[1], places=3)
예제 #17
0
    def test_pipeline(self):
        dataset = datasets.load_diabetes()
        target_scaler = preprocessing.MinMaxScaler()
        target = dataset.target.reshape(-1, 1)

        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data,
            target_scaler.fit_transform(target),
            train_size=0.85
        )

        network = algorithms.Backpropagation(
            connection=[
                layers.SigmoidLayer(10),
                layers.SigmoidLayer(40),
                layers.OutputLayer(1),
            ],
            use_bias=True,
            show_epoch=100,
            verbose=False,
        )
        pipeline = Pipeline([
            ('min_max_scaler', preprocessing.MinMaxScaler()),
            ('backpropagation', network),
        ])
        pipeline.fit(x_train, y_train, backpropagation__epochs=1000)
        y_predict = pipeline.predict(x_test)

        error = rmsle(target_scaler.inverse_transform(y_test),
                      target_scaler.inverse_transform(y_predict).round())
        self.assertAlmostEqual(0.4481, error, places=4)
def get_data(n_clients):
    """
    Import the dataset via sklearn, shuffle and split train/test.
    Return training, target lists for `n_clients` and a holdout test set
    """
    print("Loading data")
    diabetes = load_diabetes()
    y = diabetes.target
    X = diabetes.data
    # Add constant to emulate intercept
    X = np.c_[X, np.ones(X.shape[0])]

    # The features are already preprocessed
    # Shuffle
    perm = np.random.permutation(X.shape[0])
    X, y = X[perm, :], y[perm]

    # Select test at random
    test_size = 50
    test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
    train_idx = np.ones(X.shape[0], dtype=bool)
    train_idx[test_idx] = False
    X_test, y_test = X[test_idx, :], y[test_idx]
    X_train, y_train = X[train_idx, :], y[train_idx]

    # Split train among multiple clients.
    # The selection is not at random. We simulate the fact that each client
    # sees a potentially very different sample of patients.
    X, y = [], []
    step = int(X_train.shape[0] / n_clients)
    for c in range(n_clients):
        X.append(X_train[step * c: step * (c + 1), :])
        y.append(y_train[step * c: step * (c + 1)])

    return X, y, X_test, y_test
예제 #19
0
파일: test_grnn.py 프로젝트: Neocher/neupy
    def test_simple_grnn(self):
        dataset = datasets.load_diabetes()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        x_train_before = x_train.copy()
        x_test_before = x_test.copy()
        y_train_before = y_train.copy()

        grnnet = algorithms.GRNN(std=0.1, verbose=False)
        grnnet.train(x_train, y_train)
        result = grnnet.predict(x_test)
        error = rmsle(result, y_test)

        old_result = result.copy()
        self.assertAlmostEqual(error, 0.4245, places=4)

        # Test problem with variable links
        np.testing.assert_array_equal(x_train, x_train_before)
        np.testing.assert_array_equal(x_test, x_test_before)
        np.testing.assert_array_equal(y_train, y_train_before)

        x_train[:, :] = 0
        result = grnnet.predict(x_test)
        total_classes_prob = np.round(result.sum(axis=1), 10)
        np.testing.assert_array_almost_equal(result, old_result)
예제 #20
0
def test_ElasticnetWeights():
    """Test elastic net with different weight for each predictor
    alpha: a vector of weight, small # means prior knowledge
            1 : means no prior knowledge
    """

    # Has 10 features
    diabetes = datasets.load_diabetes()
    # pprint(diabetes)
    print("Size of data:{}".format(diabetes.data.shape))
    X = diabetes.data
    y = diabetes.target

    X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

    eps = 5e-3   # the smaller it is the longer is the path
    alphas = np.arange(2, 4, 0.2)
    alphas = np.append(alphas, 2.27889) # best aplpha from cv

    # Computing regularization path using the lasso
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False,
                                              alphas=alphas)

    # Computing regularization path using the elastic net
    alphas_enet, coefs_enet, _ = enet_path(
        X, y, eps=eps, l1_ratio=0.8, fit_intercept=False, alphas=alphas)


    # ElasticnetCV
    num_predict = X.shape[1]
    alphas = np.zeros(num_predict)
    alphas.fill(1)
    val = 0.1
    alphas[2] = val
    alphas[3] = val
    alphas[6] = val
    enetCV_alpha, enetCV_coef = runPrintResults(X,y, None, "EnetCV")
    runPrintResults(X,y, alphas, "EnetCVWeight 1")

    # print("coefs_enet: {}".format(coefs_enet[:, -1]))
    # print("coefs_lasso: {}".format(coefs_lasso[:, -1]))

    # Display results
    plt.figure(1)
    ax = plt.gca()
    ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
    l1 = plt.plot(alphas_lasso, coefs_lasso.T)
    l2 = plt.plot(alphas_enet, coefs_enet.T, linestyle='--')

    # repeat alpha for x-axis values for plotting
    enetCV_alphaVect = [enetCV_alpha] * num_predict
    l3 = plt.scatter(enetCV_alphaVect, enetCV_coef, marker='x')

    plt.xlabel('alpha')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'),
                loc='upper right')
    plt.axis('tight')
    plt.savefig("fig/lassoEnet")
def bagging_regression():
	digits = load_diabetes()
	x = digits.data
	y = digits.target

	sample_parameter = {
		'n_jobs': -1,
		'min_samples_leaf': 2.0,
		'n_estimators': 500,
		'max_features': 0.55,
		'criterion': 'mse',
		'min_samples_split': 4.0,
		'model': 'RFREG',
		'max_depth': 4.0
	}

	x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

	clf_layer = mlc.layer.layer.RegressionLayer()
	print "single prediction"
	#y_train_predict,y_test_predict = clf_layer.predict(x_train,y_train,x_test,sample_parameter)
	#print y_test_predict
	y_train_predict_proba,y_test_predict_proba = clf_layer.predict(x_train,y_train,x_test,sample_parameter)
	#print y_test_predict_proba
	print evaluate_function(y_test,y_test_predict_proba,'mean_squared_error')

	print "multi ensamble prediction"

	multi_bagging_clf = mlc.layer.layer.RegressionBaggingLayer()
	y_train_predict_proba,y_test_predict_proba = multi_bagging_clf.predict(x_train,y_train,x_test,sample_parameter,times=5)

	print evaluate_function(y_test,y_test_predict_proba,'mean_squared_error')
예제 #22
0
 def load_data(self, shuffled=True):
     samples = load_diabetes()
     if shuffled:
         self.X = shuffle(samples.data, random_state=self.SEED)
         self.y = shuffle(samples.target, random_state=self.SEED)
     else:
         self.X, self.y = samples.data, samples.target
     self.n_features = len(self.X[0])
예제 #23
0
    def test_regression_plot_3d(self):
        df = pdml.ModelFrame(datasets.load_diabetes())
        df.data = df.data[[0, 2]]
        df.fit(df.linear_model.LinearRegression())
        ax = df.plot_estimator()

        from mpl_toolkits.mplot3d import Axes3D
        self.assertIsInstance(ax, Axes3D)
예제 #24
0
    def test_gridsearch(self):
        import sklearn.grid_search as gs
        tuned_parameters = {'statsmodel': [sm.OLS, sm.GLS]}
        diabetes = datasets.load_diabetes()

        cv = gs.GridSearchCV(base.StatsModelsRegressor(sm.OLS), tuned_parameters, cv=5, scoring=None)
        fitted = cv.fit(diabetes.data, diabetes.target)
        self.assertTrue(fitted.best_estimator_.statsmodel is sm.OLS)
예제 #25
0
def cross_validated_estimators():
    lasso = linear_model.LassoCV()
    diabetes = datasets.load_diabetes()
    X_diabetes = diabetes.data
    y_diabetes = diabetes.target
    print(lasso.fit(X_diabetes, y_diabetes))

    # The estimator chose automatically its lambda:
    print(lasso.alpha_)
def feature_correlation_pearson(
        path="images/feature_correlation_pearson.png"):
    data = datasets.load_diabetes()
    X, y = data['data'], data['target']
    feature_names = np.array(data['feature_names'])

    visualizer = FeatureCorrelation(labels=feature_names)
    visualizer.fit(X, y)
    visualizer.poof(outpath=path, clear_figure=True)
예제 #27
0
def test_regression_scores():
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = Ridge()
    clf.fit(X_train, y_train)
    score1 = SCORERS['r2'](clf, X_test, y_test)
    score2 = r2_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)
예제 #28
0
def get_data():
    diabetes = datasets.load_diabetes()
    x = diabetes.data
    y = diabetes.target
    cases_num = 447
    x = x[:cases_num, :]
    y = y[:cases_num]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
    return x_train, y_train, x_test, y_test
예제 #29
0
def test_load_diabetes():
    res = load_diabetes()
    assert_equal(res.data.shape, (442, 10))
    assert_true(res.target.size, 442)
    assert_equal(len(res.feature_names), 10)
    assert_true(res.DESCR)

    # test return_X_y option
    check_return_X_y(res, partial(load_diabetes))
예제 #30
0
def create_diabetes():
    diabetes_data = datasets.load_diabetes()
    x = diabetes_data.data
    y = diabetes_data.target
    for i in range(x.shape[1]):
        xi = array_functions.normalize(x[:, i])
        yi = array_functions.normalize(y)
        array_functions.plot_2d(xi, yi)
        pass
    assert False
예제 #31
0
#         Virgile Fritsch <*****@*****.**>
#
# License: BSD 3 clause

import numpy as np
import pytest
from sklearn import datasets
from sklearn.covariance import empirical_covariance, EmpiricalCovariance, \
    ShrunkCovariance, shrunk_covariance, \
    LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, OAS, oas
from sklearn.utils._testing import assert_almost_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_warns

X, _ = datasets.load_diabetes(return_X_y=True)
X_1d = X[:, 0]
n_samples, n_features = X.shape


def test_covariance():
    # Tests Covariance module on a simple dataset.
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
예제 #32
0
    assert_raises(ValueError, clf.predict, sparse.lil_matrix(X))

    Xt = np.array(X).T
    clf.fit(np.dot(X, Xt), Y)
    assert_raises(ValueError, clf.predict, X)

    clf = svm.SVC()
    clf.fit(X, Y)
    assert_raises(ValueError, clf.predict, Xt)


@pytest.mark.parametrize(
    'Estimator, data',
    [(svm.SVC, datasets.load_iris(return_X_y=True)),
     (svm.NuSVC, datasets.load_iris(return_X_y=True)),
     (svm.SVR, datasets.load_diabetes(return_X_y=True)),
     (svm.NuSVR, datasets.load_diabetes(return_X_y=True)),
     (svm.OneClassSVM, datasets.load_iris(return_X_y=True))])
def test_svm_gamma_error(Estimator, data):
    X, y = data
    est = Estimator(gamma='auto_deprecated')
    err_msg = "When 'gamma' is a string, it should be either 'scale' or 'auto'"
    with pytest.raises(ValueError, match=err_msg):
        est.fit(X, y)


def test_unicode_kernel():
    # Test that a unicode kernel name does not cause a TypeError
    clf = svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    clf.predict_proba(T)
예제 #33
0
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 读入数据集
diabetes = datasets.load_diabetes()  # diabetes为有三个key的字典
# 拆分数据
data = diabetes['data']
target = diabetes['target']
feature_names = diabetes['feature_names']
# print(data.shape)
# print(target.shape)
# print(feature_names)
print(data)
df = pd.DataFrame(data, columns=feature_names)
# print(df.head())
# print(df.info())
train_X, test_X, train_Y, test_Y = train_test_split(data,
                                                    target,
                                                    train_size=0.8,
                                                    test_size=0.2)
model = LinearRegression()
model.fit(train_X, train_Y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

# plt.figure(figsize=(12,25))
# for i,col in enumerate(df.columns):
#     train_X = df.loc[:,col].reshape(-1,1)
예제 #34
0
import scipy
from sklearn.datasets import load_diabetes

from sklearn.metrics import make_scorer
from julearn.scoring import register_scorer

from julearn import run_cross_validation
from julearn.utils import configure_logging

###############################################################################
# Set the logging level to info to see extra information
configure_logging(level='INFO')

###############################################################################
# load the diabetes data from sklearn as a pandas dataframe
features, target = load_diabetes(return_X_y=True, as_frame=True)

###############################################################################
# Dataset contains ten variables age, sex, body mass index, average  blood
# pressure, and six blood serum measurements (s1-s6) diabetes patients and
# a quantitative measure of disease progression one year after baseline which
# is the target we are interested in predicting.

print('Features: \n', features.head())  # type: ignore
print('Target: \n', target.describe())  # type: ignore

###############################################################################
# Let's combine features and target together in one dataframe and define X
# and y
data_diabetes = pd.concat([features, target], axis=1)  # type: ignore
예제 #35
0
 def setUp(self):
     self.v = verbosity
     self.clf = Feat(verbosity=verbosity, n_threads=1)
     diabetes = load_diabetes()
     self.X = diabetes.data
     self.y = diabetes.target
예제 #36
0
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

columns = "age sex bmi map tc ldl hdl tch ltg glu".split(
)  # Declare the columns names

# datasets="/home/hadoop/hadoop/hadoop_working/DataScience/code/sklearn/diabetic.txt"

diabetes = datasets.load_diabetes()  # Call the diabetes dataset from sklearn

df = pd.DataFrame(diabetes.data,
                  columns=columns)  # load the dataset as a pandas data frame

y = diabetes.target  # define the target variable (dependent variable) as y

# create training and testing vars
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# fit a model
lm = linear_model.LinearRegression()
model = lm.fit(X_train, y_train)
predictions = lm.predict(X_test)

predictions[0:5]

## The line / model
plt.scatter(y_test, predictions)
예제 #37
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error

diabetes_data = datasets.load_diabetes()

# ['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename']
# print(diabetes_data.keys())
# print(diabetes_data.data) # This prints the entire data's arrays
# print(diabetes_data.DESCR)

# Below we're selecting one label and one feature
# This code gives the column second to diabetes_X and np converts it into numpy array (array of arrays)
# diabetes_X = diabetes_data.data[:, np.newaxis, 2]

diabetes_X = diabetes_data.data  # This select all the features

# print(diabetes_X)

# Now we're doing train test splitting
diabetes_X_train = diabetes_X[:-20]  # Here we're selecting last 20 features
diabetes_X_test = diabetes_X[-20:]  # Here we're selecting first 20 fetures

diabetes_y_train = diabetes_data.target[:
                                        -20]  # The corresponding label for the X Train features
diabetes_y_test = diabetes_data.target[-20:]  # Same for the X test

model = linear_model.LinearRegression()

model.fit(diabetes_X_train, diabetes_y_train)
# Importando as packages
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

from sklearn.datasets import load_diabetes

# Importando os dados
# Dez variáveis  relacionadas a  idade, sexo, índice de massa corporal, pressão arterial média
#e seis medidas séricas foram obtidas para cada um dos  442 pacientes com diabetes,

df = load_diabetes()

#Visualizando as features do dataset:
df.feature_names

#Visualizando dados:
df.data[0:5, ]

# Definindo as variáveis dependentes/independentes.
X = df.data
y = df.target

# Dividindo o dataset em conjunto de treinamento e testes
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
예제 #39
0
# 
# Bộ dữ liệu đầu vào được sử dụng cho ví dụ này là diabetes. Thông tin về bộ dữ liệu này bạn đọc có thể tham khảo tại [sklearn diabetes dataset](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset).
# 
# Mục tiêu của mô hình là từ 10 biến đầu vào là những thông tin liên quan tới người bệnh bao gồm `age, sex, body mass index, average blood pressure` và 6 chỉ số  `blood serum`. Chúng ta sẽ dự báo biến mục tiêu là một thước đo định lượng sự tiến triển của bệnh sau 1 năm điều trị.

# In[1]:


import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge

from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)
features = load_diabetes()['feature_names']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


# In[2]:


import numpy as np
import matplotlib.pyplot as plt

# Thay đổi alphas từ 1 --> 100
n_alphas = 200
alphas = 1/np.logspace(1, -2, n_alphas)
coefs = []
예제 #40
0
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
# 1. 데이터
dataset = load_diabetes()
x_train, x_test, y_train, y_test = train_test_split(dataset.data,
                                                    dataset.target,
                                                    train_size=0.8,
                                                    random_state=32)

model = RandomForestRegressor(max_depth=4)

model.fit(x_train, y_train)

#4. 평가, 예측
acc = model.score(x_test, y_test)

print(model.feature_importances_)
print("acc :", acc)

df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
new_data = []
feature = []
a = np.percentile(model.feature_importances_, q=25)

for i in range(len(dataset.data[0])):
    if model.feature_importances_[i] > a:
        new_data.append(df.iloc[:, i])
예제 #41
0
import matplotlib.pyplot as plot
import numpy
from sklearn import datasets, linear_model, metrics
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Loading diabetes Dataset
diabetesdataset = datasets.load_diabetes()

X = diabetesdataset.data
Y = diabetesdataset.target

# Training and Testing Data

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=21)

#Cross-Validation
#C = 1.0 --> SVM Regularization Parameter

#Poly Kernel

clf = SVC(kernel='poly', degree=4, C=1.0, gamma=0.1).fit(X_train, Y_train)
clf.fit(X_test, Y_test)
y_pred = clf.predict(X_test)
print("  poly Kernel Accuracy :", metrics.accuracy_score(Y_test, y_pred) * 100)

# Increasing Random State increases accuracy
예제 #42
0
import numpy as np

from sklearn.datasets import load_diabetes
from sklearn.utils.testing import assert_almost_equal

from lightning.ranking import PRank
from lightning.ranking import KernelPRank

bunch = load_diabetes()
X, y = bunch.data, bunch.target
y = np.round(y, decimals=-2)


def test_prank():
    est = PRank(n_iter=10, shuffle=False, random_state=0)
    est.fit(X, y)
    assert_almost_equal(est.score(X, y), 41.86, 2)

    est = PRank(n_iter=10, shuffle=True, random_state=0)
    est.fit(X, y)
    assert_almost_equal(est.score(X, y), 71.04, 2)


def test_prank_linear_kernel():
    est = KernelPRank(kernel="linear",
                      n_iter=10,
                      shuffle=False,
                      random_state=0)
    est.fit(X, y)
    assert_almost_equal(est.score(X, y), 41.86, 2)
예제 #43
0
def train_data():
    bunch = load_diabetes()
    X, y = bunch.data, bunch.target
    y = np.round(y, decimals=-2)
    return X, y
예제 #44
0
파일: scores.py 프로젝트: kelicht/dace
                          n_neighbors=1,
                          mode='distance',
                          metric='euclidean',
                          include_self=False,
                          n_jobs=-1)
    closest_distances = kg.toarray()[np.where(kg.toarray() > 0)]
    eps = closest_distances.max()
    clustering = DBSCAN(eps=eps, min_samples=2, leaf_size=30,
                        n_jobs=-1).fit(ball_allies_adv)
    labels = clustering.labels_
    # justif = int(labels[0] == labels[1])
    # return justif
    return int(labels[0] == labels[1])


if (__name__ == '__main__'):
    from sklearn.datasets import load_diabetes
    from sklearn.ensemble import RandomForestClassifier
    X, y = load_diabetes().data, load_diabetes().target
    y = 2 * ((y > y.mean()).astype(int)) - 1
    clf = RandomForestClassifier(n_estimators=10)
    clf = clf.fit(X[:100], y[:100])
    print(evaluation_test(X[0], clf, y[0], X[1:], y[1:]))
    # from sklearn.model_selection import train_test_split
    # X = load_diabetes().data
    # X_tr, X_ts = train_test_split(X, test_size=0.1, random_state=0)
    # lof = myLocalOutlierFactor(n_neighbors=1, metric='minkowski', p=2)
    # lof = lof.fit(X_tr)
    # print('LocalReachabilityDensity: \n {}'.format(lof.local_reachability_density(X_tr[:10])))
    # print('LocalOutlierFactor: \n {}'.format(lof.local_outlier_factor(X_ts)))
    # Estimate the score after chained imputation of the missing values
    estimator = make_pipeline(
        make_union(ChainedImputer(missing_values=0, random_state=0),
                   MissingIndicator(missing_values=0)),
        RandomForestRegressor(random_state=0, n_estimators=100))
    chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
                                            scoring='neg_mean_squared_error')

    return ((full_scores.mean(), full_scores.std()),
            (zero_impute_scores.mean(), zero_impute_scores.std()),
            (mean_impute_scores.mean(), mean_impute_scores.std()),
            (chained_impute_scores.mean(), chained_impute_scores.std()))


results_diabetes = np.array(get_results(load_diabetes()))
mses_diabetes = results_diabetes[:, 0] * -1
stds_diabetes = results_diabetes[:, 1]

results_boston = np.array(get_results(load_boston()))
mses_boston = results_boston[:, 0] * -1
stds_boston = results_boston[:, 1]

n_bars = len(mses_diabetes)
xval = np.arange(n_bars)

x_labels = ['Full data',
            'Zero imputation',
            'Mean Imputation',
            'Chained Imputation']
colors = ['r', 'g', 'b', 'orange']
예제 #46
0
giữa các phản hồi quan sát được trong tập dữ liệu và các phản hồi được dự đoán bởi
các xấp xỉ tuyến tính.

Các hệ số, tổng bình phương còn lại và điểm phương sai cũng
tính toán.

"""
print(__doc__)

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Tai tap du lieu benh tieu duong.
benhTieuDuong = datasets.load_diabetes()

# Chi su dung mot tinh nang.
benhTieuDuong_X = benhTieuDuong.data[:, np.newaxis, 2]

# Tách dữ liệu vào đào tạo / thử nghiệm bộ.
benhTieuDuong_X_train = benhTieuDuong_X[:-20]
benhTieuDuong_X_test = benhTieuDuong_X[-20:]

# Tách các mục tiêu vào đào tạo / thử nghiệm bộ.
benhTieuDuong_y_train = benhTieuDuong.target[:-20]  # 0 -> size - 20
benhTieuDuong_y_test = benhTieuDuong.target[-20:]  # size - 20 -> size

# Tạo đối tượng hồi quy tuyến tính.
linearRegression = linear_model.LinearRegression()
예제 #47
0
import numpy as np

from sklearn.datasets import load_diabetes
from sklearn.tree import DecisionTreeRegressor
from sklearn.utils.testing import assert_almost_equal

from ivalice.ranking import LambdaMART

data = load_diabetes()
X, y = data.data, data.target
y /= (y.max() - y.min())


def test_lambda_mart_ndcg():
    for gains in ("linear", "exponential"):
        reg = DecisionTreeRegressor()
        lm = LambdaMART(reg, n_estimators=10, max_rank=10, gains=gains)
        lm.fit(X, y)
        ndcg = lm.score(X, y)
        assert_almost_equal(ndcg, 1.0)
예제 #48
0
def Main():
    import argparse
    import numpy as np
    from sklearn.datasets import load_diabetes
    from chainer import cuda, Variable, FunctionSet, optimizers
    import chainer.functions as F

    parser = argparse.ArgumentParser(description='Chainer example: regression')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    batchsize = 13
    n_epoch = 100
    n_units = 30

    # Prepare dataset
    print 'fetch diabetes dataset'
    diabetes = load_diabetes()
    data = diabetes['data'].astype(np.float32)
    target = diabetes['target'].astype(np.float32).reshape(
        len(diabetes['target']), 1)

    N = batchsize * 30  #Number of training data
    x_train, x_test = np.split(data, [N])
    y_train, y_test = np.split(target, [N])
    N_test = y_test.size

    print 'Num of samples for train:', len(y_train)
    print 'Num of samples for test:', len(y_test)
    # Dump data for plot:
    fp1 = file('/tmp/smpl_train.dat', 'w')
    for x, y in zip(x_train, y_train):
        fp1.write('%s #%i# %s\n' %
                  (' '.join(map(str, x)), len(x) + 1, ' '.join(map(str, y))))
    fp1.close()
    # Dump data for plot:
    fp1 = file('/tmp/smpl_test.dat', 'w')
    for x, y in zip(x_test, y_test):
        fp1.write('%s #%i# %s\n' %
                  (' '.join(map(str, x)), len(x) + 1, ' '.join(map(str, y))))
    fp1.close()

    # Prepare multi-layer perceptron model
    model = FunctionSet(l1=F.Linear(10, n_units),
                        l2=F.Linear(n_units, n_units),
                        l3=F.Linear(n_units, 1))
    if args.gpu >= 0:
        cuda.init(args.gpu)
        model.to_gpu()

    # Neural net architecture
    def forward(x_data, y_data, train=True):
        x, t = Variable(x_data), Variable(y_data)
        h1 = F.dropout(F.relu(model.l1(x)), train=train)
        h2 = F.dropout(F.relu(model.l2(h1)), train=train)
        y = model.l3(h2)
        return F.mean_squared_error(y, t), y

    # Setup optimizer
    optimizer = optimizers.AdaDelta(rho=0.9)
    optimizer.setup(model.collect_parameters())

    # Learning loop
    for epoch in xrange(1, n_epoch + 1):
        print 'epoch', epoch

        # training
        perm = np.random.permutation(N)
        sum_loss = 0

        for i in xrange(0, N, batchsize):
            x_batch = x_train[perm[i:i + batchsize]]
            y_batch = y_train[perm[i:i + batchsize]]
            if args.gpu >= 0:
                x_batch = cuda.to_gpu(x_batch)
                y_batch = cuda.to_gpu(y_batch)

            optimizer.zero_grads()
            loss, pred = forward(x_batch, y_batch)
            loss.backward()
            optimizer.update()

            sum_loss += float(cuda.to_cpu(loss.data)) * batchsize

        print 'train mean loss={}'.format(sum_loss / N)
        '''
    # testing per batch
    sum_loss     = 0
    preds = []
    for i in xrange(0, N_test, batchsize):
      x_batch = x_test[i:i+batchsize]
      y_batch = y_test[i:i+batchsize]
      if args.gpu >= 0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

      loss, pred = forward(x_batch, y_batch, train=False)
      preds.extend(cuda.to_cpu(pred.data))
      sum_loss     += float(cuda.to_cpu(loss.data)) * batchsize
    pearson = np.corrcoef(np.asarray(preds).reshape(len(preds),), np.asarray(y_test).reshape(len(preds),))
    #'''

        #'''
        # testing all data
        preds = []
        x_batch = x_test[:]
        y_batch = y_test[:]
        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)
        loss, pred = forward(x_batch, y_batch, train=False)
        preds = cuda.to_cpu(pred.data)
        sum_loss = float(cuda.to_cpu(loss.data)) * len(y_test)
        pearson = np.corrcoef(
            np.asarray(preds).reshape(len(preds), ),
            np.asarray(y_test).reshape(len(preds), ))
        #'''

        print 'test  mean loss={}, corrcoef={}'.format(sum_loss / N_test,
                                                       pearson[0][1])

        # Dump data for plot:
        fp1 = file('/tmp/nn_test%04i.dat' % epoch, 'w')
        for x, y in zip(x_test, preds):
            fp1.write(
                '%s #%i# %s\n' %
                (' '.join(map(str, x)), len(x) + 1, ' '.join(map(str, y))))
        fp1.close()
예제 #49
0
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

from ELM import ELMRegressor

test_maes_dictionary = dict()

plt.style.use('ggplot')
sns.set_context("talk")
np.random.seed(0)

## DATA PREPROCESSING
X, y = load_diabetes().values()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=2)

stdScaler_data = StandardScaler()
X_train = stdScaler_data.fit_transform(X_train)
X_test = stdScaler_data.transform(X_test)

stdScaler_target = StandardScaler()
y_train = stdScaler_target.fit_transform(y_train)  # /max(y_train)
y_test = stdScaler_target.transform(y_test)  # /max(y_train)
max_y_train = max(abs(y_train))
y_train = y_train / max_y_train
y_test = y_test / max_y_train
예제 #50
0
def main():
    EPSILON = 1e-4

    X, y = datasets.load_diabetes(return_X_y=True)

    rng = np.random.RandomState(42)
    X = np.c_[X, rng.randn(X.shape[0], 14)]  # add some bad features

    # normalize data as done by Lars to allow for comparison
    X /= np.sqrt(np.sum(X ** 2, axis=0))

    # #############################################################################
    # LassoLarsIC: least angle regression with BIC/AIC criterion

    model_bic = LassoLarsIC(criterion='bic')
    t1 = time.time()
    model_bic.fit(X, y)
    t_bic = time.time() - t1
    alpha_bic_ = model_bic.alpha_

    model_aic = LassoLarsIC(criterion='aic')
    model_aic.fit(X, y)
    alpha_aic_ = model_aic.alpha_

    def plot_ic_criterion(model, name, color):
        criterion_ = model.criterion_
        plt.semilogx(model.alphas_ + EPSILON, criterion_, '--', color=color,
                     linewidth=3, label='%s criterion' % name)
        plt.axvline(model.alpha_ + EPSILON, color=color, linewidth=3,
                    label='alpha: %s estimate' % name)
        plt.xlabel(r'$\alpha$')
        plt.ylabel('criterion')

    plt.figure()
    plot_ic_criterion(model_aic, 'AIC', 'b')
    plot_ic_criterion(model_bic, 'BIC', 'r')
    plt.legend()
    plt.title('Information-criterion for model selection (training time %.3fs)'
              % t_bic)

    # #############################################################################
    # LassoCV: coordinate descent

    # Compute paths
    print("Computing regularization path using the coordinate descent lasso...")
    t1 = time.time()
    model = LassoCV(cv=20).fit(X, y)
    t_lasso_cv = time.time() - t1

    # Display results
    plt.figure()
    ymin, ymax = 2300, 3800
    plt.semilogx(model.alphas_ + EPSILON, model.mse_path_, ':')
    plt.plot(model.alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k',
             label='Average across the folds', linewidth=2)
    plt.axvline(model.alpha_ + EPSILON, linestyle='--', color='k',
                label='alpha: CV estimate')

    plt.legend()

    plt.xlabel(r'$\alpha$')
    plt.ylabel('Mean square error')
    plt.title('Mean square error on each fold: coordinate descent '
              '(train time: %.2fs)' % t_lasso_cv)
    plt.axis('tight')
    plt.ylim(ymin, ymax)

    # #############################################################################
    # LassoLarsCV: least angle regression

    # Compute paths
    print("Computing regularization path using the Lars lasso...")
    t1 = time.time()
    model = LassoLarsCV(cv=20).fit(X, y)
    t_lasso_lars_cv = time.time() - t1

    # Display results
    plt.figure()
    plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_, ':')
    plt.semilogx(model.cv_alphas_ + EPSILON, model.mse_path_.mean(axis=-1), 'k',
                 label='Average across the folds', linewidth=2)
    plt.axvline(model.alpha_, linestyle='--', color='k',
                label='alpha CV')
    plt.legend()

    plt.xlabel(r'$\alpha$')
    plt.ylabel('Mean square error')
    plt.title('Mean square error on each fold: Lars (train time: %.2fs)'
              % t_lasso_lars_cv)
    plt.axis('tight')
    plt.ylim(ymin, ymax)

    plt.show()
예제 #51
0
def diabetes():
    return load_diabetes()
Training a pipeline
+++++++++++++++++++
"""
from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import numpy
from onnxruntime import InferenceSession
from sklearn.datasets import load_diabetes
from sklearn.ensemble import (GradientBoostingRegressor, RandomForestRegressor,
                              VotingRegressor)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from skl2onnx import to_onnx
from mlprodict.onnxrt import OnnxInference

X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Train classifiers
reg1 = GradientBoostingRegressor(random_state=1, n_estimators=5)
reg2 = RandomForestRegressor(random_state=1, n_estimators=5)
reg3 = LinearRegression()

ereg = Pipeline(steps=[
    ('voting', VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])),
])
ereg.fit(X_train, y_train)

#################################
# Converts the model
# ++++++++++++++++++
예제 #53
0
 def test_precict(self):
     diabetes = datasets.load_diabetes()
     estimator = base.StatsModelsRegressor(sm.OLS)
     with self.assertRaisesRegexp(
             ValueError, 'StatsModelsRegressor is not fitted to data'):
         estimator.predict(diabetes.data)
from sys import platform
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier, plot_importance

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from xgboost.sklearn import XGBRegressor

datasets = load_diabetes()

x_train, x_test, y_train, y_test = train_test_split(datasets.data,
                                                    datasets.target,
                                                    train_size=0.8,
                                                    random_state=104)

#2

# model  = GradientBoostingRegressor(max_depth=4)
model = XGBRegressor(n_jobs=-1, use_label_encoder=False)

#3

model.fit(x_train, y_train, eval_metric='mlogloss')

#4
예제 #55
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error

diabetes = datasets.load_diabetes()  # loading dataset of diabetes
print(diabetes.keys())  # it shows keys from dataset
# (['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])

print(diabetes.data)  # it shows data in numpy array form
print(diabetes.DESCR)  # it gives description of dataset

# diabetes_X = diabetes.data[:, np.newaxis, 3]   # : gets all values from 3rd column and put init numpy array
# print(diabetes_X)                                  # simple linear regression
diabetes_X = diabetes.data  # giving all data of all types of features i.e. multiple regression (we cant plot this)

diabetes_X_train = diabetes_X[:
                              -30]  # getting feature values except last 30 values to train
diabetes_X_test = diabetes_X[
    -30:]  # getting last 30 feature values to test our programme

diabetes_Y_train = diabetes.target[:
                                   -30]  # these are the labels for train model
diabetes_Y_test = diabetes.target[-30:]  # these are labels for test model

model = linear_model.LinearRegression()  # making model of linear regression

model.fit(diabetes_X_train, diabetes_Y_train)  # giving value to model to train

diabetes_Y_predicted = model.predict(
    diabetes_X_test)  # giving values to model to predict
예제 #56
0
from sklearn.datasets import load_diabetes
from sklearn import linear_model
import matplotlib.pyplot as plt
import pandas

diabetes = load_diabetes()
diabetes.keys()

print(diabetes.DESCR)

tabela = pandas.DataFrame(diabetes.data)
tabela.columns = diabetes.feature_names
tabela.head(10)
tabela['Taxa'] = diabetes.target
print(tabela.head(10))
X = tabela[["bmi", "s3"]]

X_t = X[:-20]
X_v = X[-20:]
print(X_t["bmi"])
y_t = tabela["Taxa"][:-20]
y_v = tabela["Taxa"][-20:]

regr = linear_model.LinearRegression()

# treina o modelo
regr.fit(X_t, y_t)

# faz a predição
y_pred = regr.predict(X_v)
예제 #57
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model  # we use built in data set diabetes from sklearn
from sklearn.metrics import mean_squared_error

diabetes = datasets.load_diabetes()  # this load data set from sklearn

# print(diabetes.key())   # it will show the keys of data set
# above line show this ---->
# dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])

# print(diabetes.data)

# print(diabetes.DESCR) #it shows the data dataset description

diabetes_x = diabetes.data[:, np.newaxis,
                           2]  #in it : mean all and we access data of only 2 index

#for x axis

diabetes_x_Train = diabetes_x[:-30]  # we access last 30 to train our algo
diabetes_x_Test = diabetes_x[20:]  # we access first 20 fro test
# hum apni marzi k mutabiq b lai saktey hain 10 test and 10 train k liye i starah

#for y axis

diabetes_y_train = diabetes.target[:-30]
diabetes_y_test = diabetes.target[20:]

#for linear model
model = linear_model.LinearRegression()  # linearregression ko import kiya
 def __init__(self):
     self._diabetes = datasets.load_diabetes()
     self._shrink_x = np.c_[ .5, 1].T
     self._shrink_y = [.5, 1]
     self._shrink_t = np.c_[ 0, 2].T
     self._alphas   = np.logspace(-4, -1, 6)
예제 #59
0
The coefficients, the residual sum of squares and the variance score are also
calculated.

"""
print(__doc__)

# Code source: Jaques Grobler
# License: BSD 3 clause

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes = datasets.load_diabetes()

# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]

# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

# Create linear regression object
regr = linear_model.LinearRegression()
예제 #60
0
import numpy as np
from sklearn import linear_model, metrics
from sklearn import datasets
from sklearn.metrics import r2_score

diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
diabetes_X_train = diabetes_X[:-20]
diabetes_y_train = diabetes_y[:-20]
diabetes_X_test = diabetes_X[-20:]
diabetes_y_test = diabetes_y[-20:]

regr = linear_model.LinearRegression()
regr.fit(diabetes_X_train, diabetes_y_train)

print(regr.coef_)
diabetes_y_pred = regr.predict(diabetes_X_test)
mean_square_error = metrics.mean_squared_error(diabetes_y_test,
                                               diabetes_y_pred)
print('mean square error:{}'.format(mean_square_error))
print('r2 score: {}'.format(r2_score(diabetes_y_test, diabetes_y_pred)))