예제 #1
0
def FriedmanDataset_2():
    d_train = datasets.make_friedman2(240, random_state=0)
    d_test = datasets.make_friedman2(1000, random_state=0)

    features_train = d_train[0]
    for i in range(240):
        features_train[i] += np.random.normal(0, features_train[i] / 3)

    target_train = d_train[1]
    features_test = d_test[0]
    target_test = d_test[1]

    return features_train, target_train, features_test, target_test
예제 #2
0
def test_make_friedman2():
    X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0)

    assert_equal(X.shape, (5, 4), "X shape mismatch")
    assert_equal(y.shape, (5,), "y shape mismatch")

    assert_array_almost_equal(y, (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5)
예제 #3
0
def friedman2(n_samples=100,
              noise=0.0,
              random_state=None):

    return datasets.make_friedman2(n_samples=n_samples,
                                   noise=noise,
                                   random_state=random_state)
def test_regression_synthetic():
    """Test on synthetic regression datasets used in Leo Breiman,
    `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """
    random_state = check_random_state(1)
    regression_params = {'n_estimators': 100, 'max_depth': 4,
                         'min_samples_split': 1, 'learning_rate': 0.1,
                         'loss': 'ls'}

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=random_state, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor()
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 5.0, "Failed on Friedman1 with mse = %.4f" % mse

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 1700.0, "Failed on Friedman2 with mse = %.4f" % mse

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 0.015, "Failed on Friedman3 with mse = %.4f" % mse
def test_regression_synthetic():
    """Test on synthetic regression datasets used in Leo Breiman,
    `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """
    random_state = check_random_state(1)
    regression_params = {'n_estimators': 100, 'max_depth': 4,
                         'min_samples_split': 1, 'learning_rate': 0.1,
                         'loss': 'ls'}

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=random_state, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor()
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 5.0, "Failed on Friedman1 with mse = %.4f" % mse

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 1700.0, "Failed on Friedman2 with mse = %.4f" % mse

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 0.015, "Failed on Friedman3 with mse = %.4f" % mse
예제 #6
0
def test_make_friedman2():
    X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0)

    assert X.shape == (5, 4), "X shape mismatch"
    assert y.shape == (5, ), "y shape mismatch"

    assert_array_almost_equal(y, (X[:, 0]**2 + (X[:, 1] * X[:, 2] - 1 /
                                                (X[:, 1] * X[:, 3]))**2)**0.5)
예제 #7
0
def friedman_2(n, noise):
    """Generate the friedman_2 data set

    Args:
    n (int): number of data samples
    noise (float): added noise

    Returns:
    Friedman 2 data set

    """
    return make_friedman2(n_samples=n, noise=noise)
예제 #8
0
def generateDatas(N, F, choice):

    if (choice == 'f1'):
        X, Y = datasets.make_friedman1(N, F, noise=1)
    elif (choice == 'f2'):
        X, Y = datasets.make_friedman2(N, F)
    elif (choice == 'f3'):
        X, Y == datasets.make_friedman3(N, F)
    elif (choice == 'boston'):
        boston = datasets.load_boston()
        X, Y = boston.data, boston.target

    return X, Y
예제 #9
0
    def genFriedman(self, i=1, N=240, D=10):
        if i not in range(1, 4):
            raise Exception('not a correct dataset')

        if i == 1:
            X, Y = datasets.make_friedman1(N, D)

        if i == 2:
            X, Y = datasets.make_friedman2(N, D)

        if i == 3:
            X, Y = datasets.make_friedman3(N, D)
        return X, Y
예제 #10
0
    def genFriedman(self, i=1, N=240, D=10):
        if i not in range(1,4):
            raise Exception('not a correct dataset')

        if i == 1:
            X, Y = datasets.make_friedman1(N, D )

        if i == 2:
            X, Y = datasets.make_friedman2(N, D)

        if i == 3:
            X, Y = datasets.make_friedman3(N, D)
        return X, Y
예제 #11
0
def define_tested_reg_datasets():

    gDatasets = {};
    gDatasets["diabetes"] = datasets.load_diabetes()
    gDatasets["boston"] = datasets.load_boston()
    gDatasets["freidman1"] = datasets.make_friedman1(random_state=1960)
    gDatasets["freidman2"] = datasets.make_friedman2(random_state=1960)
    gDatasets["freidman3"] = datasets.make_friedman3(random_state=1960)
    gDatasets["RandomReg_10"] = datasets.make_regression(n_features=10, random_state=1960);
    gDatasets["RandomReg_100"] = datasets.make_regression(n_features=100, random_state=1960);
    gDatasets["RandomReg_500"] = datasets.make_regression(n_features=500, random_state=1960);

    return gDatasets;
예제 #12
0
def generate_Dataset(name_dataset):
    # default 
    ind = f(name_dataset)
    if ind == 1:
        x,y = datasets.make_friedman1(n_samples=100, n_features=10, noise=0.0, random_state=None)
    elif ind == 2:
        x,y = datasets.make_friedman2(n_samples=100, noise=0.0, random_state=None)
    elif ind == 3:
        x,y = datasets.make_friedman3(n_samples=100, noise=0.0, random_state=None)
    else :
        x, y = datasets.load_boston(return_X_y=True)
     
    x = x.tolist()
    return x,y
예제 #13
0
def test_regression_synthetic():
    # Test on synthetic regression datasets used in Leo Breiman,
    # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
    random_state = check_random_state(1)
    regression_params = {
        'n_estimators': 100,
        'max_depth': 4,
        'min_samples_split': 2,
        'learning_rate': 0.1,
        'loss': 'ls'
    }

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=random_state,
                                   noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        clf = GradientBoostingRegressor(presort=presort)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 5.0)

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 1700.0)

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 0.015)
def test_regression_synthetic():
    # Test on synthetic regression datasets used in Leo Breiman,
    # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
    random_state = check_random_state(1)
    regression_params = {'n_estimators': 100, 'max_depth': 4,
                         'min_samples_split': 2, 'learning_rate': 0.1,
                         'loss': 'ls'}

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=random_state,
                                   noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        clf = GradientBoostingRegressor(presort=presort)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 5.0)

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 1700.0)

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 0.015)
예제 #15
0
def main():
    X, y = make_friedman2(
        n_samples=500, noise=0,
        random_state=0)  # This is the test data we are fitting
    print(type(X))
    print(np.shape(X))  # 500 samples each with 4 variables
    print(type(y))
    print(np.shape(y))  # 500 outputs

    kernel = DotProduct() + WhiteKernel()
    gpr = GaussianProcessRegressor(kernel=kernel,
                                   optimizer='fmin_l_bfgs_b',
                                   random_state=0).fit(X, y)
    gpr_score = gpr.score(X, y)  # R2 of the prediction
    print('Prediction of R^2: %f' % gpr_score)

    print("Shape of thing ", np.shape(X[:2, :]))
    print("Thing ", X[:2, :])
    gpr_predict = gpr.predict(X[:2, :], return_std=True)
    print(gpr_predict)
def test_regression_synthetic():
    # Test on synthetic regression datasets used in Leo Breiman,
    # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
    random_state = check_random_state(1)
    regression_params = {
        "n_estimators": 100,
        "max_depth": 4,
        "min_samples_split": 2,
        "learning_rate": 0.1,
        "loss": "squared_error",
    }

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    clf = GradientBoostingRegressor()
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 5.0

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 1700.0

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    clf = GradientBoostingRegressor(**regression_params)
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    assert mse < 0.015
예제 #17
0
def test_all_regressors():
    x, y  = make_friedman2(10000)
    x_train, y_train, x_test, y_test = test_helpers.split_dataset(x,y)
    #print y_test[:100]
    ols = LinearRegression()
    ols.fit(x_train, y_train)
    ols_pred = ols.predict(x_test)
    #print ols_pred[:100]
    ols_mse = mean_square_error(y_test, ols_pred)
    
    for fn in regressors:
        
        print fn
        model = fn(x_train,y_train)
        print model 
        pred = model.predict(x_test)
        #print pred[:100]
        mse = mean_square_error(y_test, pred)
        
        print "OLS MSE:", ols_mse, " Current MSE:", mse
        print "Ratio:",  mse / ols_mse 
        assert ols_mse > 1.1*mse
예제 #18
0
def test_all_regressors():
    x, y = make_friedman2(10000)
    x_train, y_train, x_test, y_test = test_helpers.split_dataset(x, y)
    #print y_test[:100]
    ols = LinearRegression()
    ols.fit(x_train, y_train)
    ols_pred = ols.predict(x_test)
    #print ols_pred[:100]
    ols_mse = mean_square_error(y_test, ols_pred)

    for fn in regressors:

        print fn
        model = fn(x_train, y_train)
        print model
        pred = model.predict(x_test)
        #print pred[:100]
        mse = mean_square_error(y_test, pred)

        print "OLS MSE:", ols_mse, " Current MSE:", mse
        print "Ratio:", mse / ols_mse
        assert ols_mse > 1.1 * mse
예제 #19
0
 def friedman2(n_samples=20000):
     """ Generated data """
     (data, target) = datasets.make_friedman2(n_samples=n_samples)
     return DatasetFactory.Dataset(data=data, target=target)
예제 #20
0
def uniform_dataset(args):
    X = np.random.random(size=(args.num_examples, args.num_features))
    y = np.random.choice([-1, 1], size=args.num_examples)
    return (X, y)


DATASETS = {
    "uniform":
    uniform_dataset,
    "hastie":
    lambda args: datasets.make_hastie_10_2(n_samples=args.num_examples),
    "friedman1":
    lambda args: datasets.make_friedman1(n_samples=args.num_examples,
                                         n_features=args.num_features),
    "friedman2":
    lambda args: datasets.make_friedman2(n_samples=args.num_examples,
                                         noise=args.noise),
    "friedman3":
    lambda args: datasets.make_friedman3(n_samples=args.num_examples,
                                         noise=args.noise),
    "make_regression":
    lambda args: datasets.make_regression(n_samples=args.num_examples,
                                          n_features=args.num_features,
                                          n_informative=args.num_informative)
}

ENSEMBLE_REGRESSORS = [
    ("GB-D1", with_depth(ensemble.GradientBoostingRegressor, 1)),
    ("GB-D3", with_depth(ensemble.GradientBoostingRegressor, 3)),
    ("GB-B10", with_best_first(ensemble.GradientBoostingRegressor, 10)),
    ("RF-D1", with_depth(ensemble.RandomForestRegressor, 1)),
    ("RF-D3", with_depth(ensemble.RandomForestRegressor, 3)),
예제 #21
0
# filename = input('Введите путь файла для записи: ')
# Define the color maps for plots
color_map = plt.cm.get_cmap('RdYlBu')
color_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["red", "cyan", "magenta", "blue"])
fig = plt.figure(figsize=(18, 5))

x, y = dt.make_friedman1(n_samples=1000, n_features=5, random_state=rand_state)
dataset_x1 = x
dataset_y1 = y
ax = fig.add_subplot(131, projection='3d')
my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map)
fig.colorbar(my_scatter_plot)
plt.title('make_friedman1')

x, y = dt.make_friedman2(n_samples=1000, random_state=rand_state)
dataset_x2 = x
dataset_y2 = y
ax = fig.add_subplot(132, projection='3d')
my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map)
fig.colorbar(my_scatter_plot)
plt.title('make_friedman2')

x, y = dt.make_friedman3(n_samples=1000, random_state=rand_state)
dataset_x3 = x
dataset_y3 = y
ax = fig.add_subplot(133, projection='3d')
my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map)
fig.colorbar(my_scatter_plot)
plt.suptitle('make_friedman?() for Non-Linear Data', fontsize=20)
plt.title('make_friedman3')
예제 #22
0
'''Friedman #2 data example'''

import rvm
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_friedman2
from sklearn.datasets import make_friedman3

print('Friedman data set')

np.random.seed(2)
n_exp = 1
rel_vec = 0
RMSE = 0
for i in range(n_exp):
    train_x, train_y = make_friedman2(240, noise=0)
    cl = rvm.RVMRegression(kernel="rbf",  gamma=0.00001)
    cl.fit(np.matrix(train_x), np.matrix(train_y.reshape(240, 1)))
    rel_vec += len(cl.rel_ind)

    valid_x, valid_y = make_friedman2(240)
    pred_y = cl.predict(valid_x)

    RMSE += mean_squared_error(valid_y, pred_y) ** 0.5

print("Vectors: ", rel_vec / n_exp)
print("Root mean square = ", RMSE / n_exp)
예제 #23
0
def getSKData(style='timeseries', n_samples=1, **kwargs):
    if isinstance(style, str):
        style = Style(style.lower())
    if style == Style.REGRESSION:
        return make_regression(
            n_samples, kwargs.get('n_features', RegressionArgs.n_features),
            kwargs.get('n_informative', RegressionArgs.n_informative),
            kwargs.get('n_targets', RegressionArgs.n_targets),
            kwargs.get('bias', RegressionArgs.bias),
            kwargs.get('effective_rank', RegressionArgs.effective_rank),
            kwargs.get('tail_strength', RegressionArgs.tail_strength),
            kwargs.get('noise', RegressionArgs.noise),
            kwargs.get('shuffle', RegressionArgs.shuffle),
            kwargs.get('coef', RegressionArgs.coef),
            kwargs.get('random_state', RegressionArgs.random_state))
    elif style == Style.BLOBS:
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == Style.CLASSIFICATION:
        return make_classification(
            n_samples, kwargs.get('n_features', ClassificationArgs.n_features),
            kwargs.get('n_informative', ClassificationArgs.n_informative),
            kwargs.get('n_redundant', ClassificationArgs.n_redundant),
            kwargs.get('n_repeated', ClassificationArgs.n_repeated),
            kwargs.get('n_classes', ClassificationArgs.n_classes),
            kwargs.get('n_clusters_per_class',
                       ClassificationArgs.n_clusters_per_class),
            kwargs.get('weights', ClassificationArgs.weights),
            kwargs.get('flip_y', ClassificationArgs.flip_y),
            kwargs.get('class_sep', ClassificationArgs.class_sep),
            kwargs.get('hypercube', ClassificationArgs.hypercube),
            kwargs.get('shift', ClassificationArgs.shift),
            kwargs.get('scale', ClassificationArgs.scale),
            kwargs.get('shuffle', ClassificationArgs.shuffle),
            kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == Style.MULTILABEL:
        return make_multilabel_classification(
            n_samples,
            kwargs.get('n_features', MultilabelClassificationArgs.n_features),
            kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
            kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
            kwargs.get('length', MultilabelClassificationArgs.length),
            kwargs.get('allow_unlabeled',
                       MultilabelClassificationArgs.allow_unlabeled),
            kwargs.get('sparse', MultilabelClassificationArgs.sparse),
            kwargs.get('return_indicator',
                       MultilabelClassificationArgs.return_indicator),
            kwargs.get('return_distributions',
                       MultilabelClassificationArgs.return_distributions),
            kwargs.get('random_state',
                       MultilabelClassificationArgs.random_state))
    elif style == Style.GAUSSIAN:
        return make_gaussian_quantiles(
            n_samples=n_samples,
            n_features=kwargs.get('n_features', GaussianArgs.n_features),
            mean=kwargs.get('mean', GaussianArgs.mean),
            cov=kwargs.get('cov', GaussianArgs.cov),
            n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
            shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
            random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == Style.HASTIE:
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get(
                                    'random_state', HastieArgs.random_state))
    elif style == Style.CIRCLES:
        return make_circles(
            n_samples, kwargs.get('shuffle', CirclesArgs.shuffle),
            kwargs.get('noise', CirclesArgs.noise),
            kwargs.get('random_state', CirclesArgs.random_state),
            kwargs.get('factor', CirclesArgs.factor))
    elif style == Style.MOONS:
        return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == Style.BICLUSTERS:
        return make_biclusters(
            kwargs.get('shape', BiclusterArgs.shape),
            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
            kwargs.get('noise', BiclusterArgs.noise),
            kwargs.get('minval', BiclusterArgs.minval),
            kwargs.get('maxval', BiclusterArgs.maxval),
            kwargs.get('shuffle', BiclusterArgs.shuffle),
            kwargs.get('random_state', BiclusterArgs.random_state))
    elif style == Style.SCURVE:
        return make_s_curve(
            n_samples, kwargs.get('noise', SCurveArgs.noise),
            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == Style.CHECKER:
        return make_checkerboard(
            kwargs.get('shape', CheckerArgs.shape),
            kwargs.get('n_clusters', CheckerArgs.n_clusters),
            kwargs.get('noise', CheckerArgs.noise),
            kwargs.get('minval', CheckerArgs.minval),
            kwargs.get('maxval', CheckerArgs.maxval),
            kwargs.get('shuffle', CheckerArgs.shuffle),
            kwargs.get('random_state', CheckerArgs.random_state))
    elif style == Style.FRIEDMAN:
        return make_friedman1(
            n_samples, kwargs.get('n_features', FriedmanArgs.n_features),
            kwargs.get('noise', FriedmanArgs.noise),
            kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == Style.FRIEDMAN2:
        return make_friedman2(
            n_samples, kwargs.get('noise', Friedman2Args.noise),
            kwargs.get('random_state', Friedman2Args.random_state))
    elif style == Style.FRIEDMAN3:
        return make_friedman3(
            n_samples, kwargs.get('noise', Friedman3Args.noise),
            kwargs.get('random_state', Friedman3Args.random_state))
def prep_data_sklearn(dataset_name, test_size=0.2, model_class='realkd', downsample_size=None, norm_mean=False,
                      random_seed=None, pos_class=None):

    target_name, without = dataset_signature(dataset_name)

    if dataset_name == 'tic-tac-toe':
        bunch = ds.fetch_openml(dataset_name)
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        df.rename(lambda s: s[:-7], axis='columns', inplace=True)
        df.replace(0, 'b', inplace=True)
        df.replace(1, 'o', inplace=True)
        df.replace(2, 'x', inplace=True)
        data_rf = pd.get_dummies(df)
        target = pd.Series(where(bunch.target == 'positive', 1, -1))
    elif dataset_name == 'kr - vs - kp':
        bunch = ds.fetch_openml(data_id=3)
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        data_rf = pd.get_dummies(df)
        target = pd.Series(where(bunch.target == 'won', 1, -1))
    elif dataset_name == 'breast_cancer':
        bunch = ds.load_breast_cancer()
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        data_rf = pd.get_dummies(df)
        target = pd.Series(where(bunch.target == 1, 1, -1))
    elif dataset_name == 'iris':
        bunch = ds.load_iris()
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        data_rf = pd.get_dummies(df)
        target = pd.Series(where(bunch.target == 1, 1, -1))
    elif dataset_name == 'make_friedman1':
        global_friedman_cols =10
        data, target = ds.make_friedman1(n_samples=2000, n_features=10, noise=0.1, random_state=random_seed) # 1
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i+1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(target)
    elif dataset_name == 'make_friedman2':
        data, target = ds.make_friedman2(n_samples=2000, noise=0.1, random_state=random_seed) # 1
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i+1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(target)
    elif dataset_name == 'make_friedman3':
        data, target = ds.make_friedman3(n_samples=2000, noise=0.1, random_state=random_seed)
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i+1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(target)
    elif dataset_name == 'make_classification2':
        data, target = ds.make_classification(n_samples=2000, n_features=8, n_classes=2,
                                              hypercube=True, n_clusters_per_class=3,
                                              n_informative=3, n_redundant=3, n_repeated=0,
                                              random_state=random_seed)
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i+1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(where(target == 1, 1, -1))
    elif dataset_name == 'make_classification3':
        data, target = ds.make_classification(n_samples=2000, n_features=15, n_classes=3,
                                              hypercube=True, n_clusters_per_class=3,
                                              n_informative=5, n_redundant=5, n_repeated=0,
                                              random_state=random_seed)
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i + 1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(where(target == 1, 1, -1))
    elif dataset_name == 'load_wine':
        bunch = ds.load_wine()
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        data_rf = pd.get_dummies(df)
        target = pd.Series(where(bunch.target == 1, 1, -1))
    elif dataset_name == 'make_hastie_10_2':
        data, target = ds.make_hastie_10_2(n_samples=12000, random_state=random_seed)
        no_cols = np.size(data, 1)
        col_names = ['X' + str(i+1) for i in range(no_cols)]
        data_rf = pd.DataFrame(data, columns=col_names)
        target = pd.Series(where(target == 1, 1, -1))
    elif dataset_name == 'load_diabetes':
        bunch = ds.load_diabetes()
        df = pd.DataFrame(bunch.data, columns=bunch.feature_names)
        data_rf = pd.get_dummies(df)
        target = pd.Series(bunch.target)
    elif dataset_name[:-1] == 'noisy_pairity_':
        d = int(dataset_name[-1])
        data, target_name, random_seed = prep_noisy_pairity(d=d, random_seed=random_seed)
        return data, target_name, random_seed
    elif dataset_name == 'digits5':
        data_rf, target = prep_digits()

    x_train, x_test, y_train, y_test = train_test_split(data_rf, target, test_size=test_size, random_state=random_seed)

    if downsample_size != None:
        x_train[target_name] = y_train
        sampled_train = x_train.sample(n=min(downsample_size, len(y_train)), random_state=random_seed)
        x_train.reset_index(inplace=True, drop=True) # this may be unncessesary
        y_train = sampled_train[target_name]
        x_train = sampled_train.drop([target_name], axis='columns')

    if norm_mean:  # scikitlearn transformer.
        target_train_mean = sum(y_train) / len(y_train)
        y_train -= target_train_mean
        y_test -= target_train_mean

        y_train = [y_train, target_train_mean]
        y_test = [y_test, target_train_mean]


    data = [x_train, y_train, x_test, y_test]

    n = (len(y_train), len(y_test))

    return data, target_name, random_seed
예제 #25
0
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.datasets import make_friedman2
import matplotlib.pyplot as plt
from sklearn.gaussian_process.kernels import Matern
import numpy as np

N = 10000
addns = 2
X, _ = make_friedman2(n_samples=N, noise=0, random_state=0)
Xtrain = X[:int(0.7 * N)]
Xpool = X[int(0.7 * N):int(0.8 * N)]
Xtest = X[int(0.8 * N):]
Xtrain = (Xtrain - np.mean(Xtrain)) / np.std(Xtrain)
Xtest = (Xtest - np.mean(Xtest)) / np.std(Xtest)

y = np.random.randint(2, size=N)
ytrain = y[:int(0.7 * N)]
ypool = X[int(0.7 * N):int(0.8 * N)]
ytest = y[int(0.7 * N):]

model = GaussianProcessClassifier()
gp = model.fit(Xtrain, ytrain)
preds = gp.predict_proba(Xtrain)

preds = np.max(preds, axis=1)
newXs = (1 - preds).argsort()[-addns:][::-1]
예제 #26
0
    @logger.catch
    def predict_proba(self, X, **kwargs):
        prediction = self.model.predict_proba(X, **kwargs)
        return prediction

    @logger.catch
    def partial_fit(self, X, y, **kwargs):
        self.changed = True
        self.model.partial_fit(X, y, **kwargs)

    def fit(self, X, y, **kwargs):
        self.changed = True
        self.model.fit(X, y, **kwargs)


class CustomSklearnGaussianProcedure(SklearnProcedure):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        kernel = DotProduct() + WhiteKernel()
        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        self.dictionary.model = gpr


if __name__ == "__main__":
    general_procedure = CustomSklearnGaussianProcedure()
    X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
    general_procedure.fit(X, y)
    print(general_procedure.predict(X[:2, :], return_std=True))
    # print(general_procedure.get_params())
    print(general_procedure.extract())
예제 #27
0
import numpy as np
import sklearn.datasets as Datasets
import seaborn as sn
import matplotlib.pyplot as plt

X, y = Datasets.make_friedman2(200, 0.3)
index = np.random.choice(range(len(X)), 3)
center = X[index, :]
print(center)
print("the output of load_wine() :: ", datasets.load_wine())

#make_blobs() excuted
print("the output of make_blobs() :: ", datasets.make_blobs())

#make_circles() executed
print("the output of make_circles() :: ", datasets.make_circles())

#make_classification() executed
print("the output of make_classification() :: ",
      datasets.make_classification())
#make_friedman1() executed
print("the output of make_friedman1() :: ", datasets.make_friedman1())

#make_friedman2() executed
print("the output of make_friedman2() :: ", datasets.make_friedman2())

#make_friedman3() executed
print("the output of make_friedman3() :: ", datasets.make_friedman3())

#make_gaussian_quantiles() executed
print("the output of make_gaussian_quantiles() :: ",
      datasets.make_gaussian_quantiles())

#make_hastie_10_2() executed
print("the output of make_hastie_10_2() :: ", datasets.make_hastie_10_2())

#make_moons() executed
print("the output of make_moons() :: ", datasets.make_moons())

#make_multilabel_classification() executed
from sklearn import datasets
import matplotlib.pyplot as plt

# make_friedman2 data
X, y = datasets.make_friedman2(n_samples=100, noise=0.0, random_state=None)
print(X)
print(y)
예제 #30
0
파일: tf_test.py 프로젝트: nsaura/ML
try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3

NNC = reload(NNC)

import tensorflow as tf

boston = sdata.load_boston()
diabetes = sdata.load_diabetes()
mf1 = sdata.make_friedman1(n_samples=2500)
mf2 = sdata.make_friedman2(n_samples=2500)

datas =\
[   [boston.data, boston.target, "boston"],
    [diabetes.data, diabetes.target, "diabetes"],
    [mf1[0], mf1[1], "friedman1"],
    [mf2[0], mf2[1], "friedman2"],
]

dict_layers = lambda x,size :\
{   "I" : x,\
    "N1" : size,\
    "N2" : size,\
    "N3" : size,\
    "N4" : size,\
    "N5" : size,\
예제 #31
0
def getSKData(style='timeseries', as_dataframe=False, n_samples=10, **kwargs):
    if style == 'regression':
        return make_regression(n_samples,
                               kwargs.get('n_features', RegressionArgs.n_features),
                               kwargs.get('n_informative', RegressionArgs.n_informative),
                               kwargs.get('n_targets', RegressionArgs.n_targets),
                               kwargs.get('bias', RegressionArgs.bias),
                               kwargs.get('effective_rank', RegressionArgs.effective_rank),
                               kwargs.get('tail_strength', RegressionArgs.tail_strength),
                               kwargs.get('noise', RegressionArgs.noise),
                               kwargs.get('shuffle', RegressionArgs.shuffle),
                               kwargs.get('coef', RegressionArgs.coef),
                               kwargs.get('random_state', RegressionArgs.random_state))
    elif style == 'blobs':
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == 'classification':
        return make_classification(n_samples,
                                   kwargs.get('n_features', ClassificationArgs.n_features),
                                   kwargs.get('n_informative', ClassificationArgs.n_informative),
                                   kwargs.get('n_redundant', ClassificationArgs.n_redundant),
                                   kwargs.get('n_repeated', ClassificationArgs.n_repeated),
                                   kwargs.get('n_classes', ClassificationArgs.n_classes),
                                   kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class),
                                   kwargs.get('weights', ClassificationArgs.weights),
                                   kwargs.get('flip_y', ClassificationArgs.flip_y),
                                   kwargs.get('class_sep', ClassificationArgs.class_sep),
                                   kwargs.get('hypercube', ClassificationArgs.hypercube),
                                   kwargs.get('shift', ClassificationArgs.shift),
                                   kwargs.get('scale', ClassificationArgs.scale),
                                   kwargs.get('shuffle', ClassificationArgs.shuffle),
                                   kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == 'multilabel':
        return make_multilabel_classification(n_samples,
                                              kwargs.get('n_features', MultilabelClassificationArgs.n_features),
                                              kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
                                              kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
                                              kwargs.get('length', MultilabelClassificationArgs.length),
                                              kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled),
                                              kwargs.get('sparse', MultilabelClassificationArgs.sparse),
                                              kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator),
                                              kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions),
                                              kwargs.get('random_state', MultilabelClassificationArgs.random_state))
    elif style == 'gaussian':
        return make_gaussian_quantiles(n_samples=n_samples,
                                       n_features=kwargs.get('n_features', GaussianArgs.n_features),
                                       mean=kwargs.get('mean', GaussianArgs.mean),
                                       cov=kwargs.get('cov', GaussianArgs.cov),
                                       n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
                                       shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
                                       random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == 'hastie':
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get('random_state', HastieArgs.random_state))
    elif style == 'circles':
        return make_circles(n_samples,
                            kwargs.get('shuffle', CirclesArgs.shuffle),
                            kwargs.get('noise', CirclesArgs.noise),
                            kwargs.get('random_state', CirclesArgs.random_state),
                            kwargs.get('factor', CirclesArgs.factor))
    elif style == 'moons':
        return make_moons(n_samples,
                          kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == 'biclusters':
        x = make_biclusters(kwargs.get('shape', BiclusterArgs.shape),
                            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
                            kwargs.get('noise', BiclusterArgs.noise),
                            kwargs.get('minval', BiclusterArgs.minval),
                            kwargs.get('maxval', BiclusterArgs.maxval),
                            kwargs.get('shuffle', BiclusterArgs.shuffle),
                            kwargs.get('random_state', BiclusterArgs.random_state))
        if as_dataframe:
            return pd.concat([pd.DataFrame(x[0]), pd.DataFrame(x[1].T)], axis=1)
        else:
            return x

    elif style == 'scurve':
        return make_s_curve(n_samples,
                            kwargs.get('noise', SCurveArgs.noise),
                            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == 'checker':
        return make_checkerboard(kwargs.get('shape', CheckerArgs.shape),
                                 kwargs.get('n_clusters', CheckerArgs.n_clusters),
                                 kwargs.get('noise', CheckerArgs.noise),
                                 kwargs.get('minval', CheckerArgs.minval),
                                 kwargs.get('maxval', CheckerArgs.maxval),
                                 kwargs.get('shuffle', CheckerArgs.shuffle),
                                 kwargs.get('random_state', CheckerArgs.random_state))
    elif style == 'friedman':
        return make_friedman1(n_samples,
                              kwargs.get('n_features', FriedmanArgs.n_features),
                              kwargs.get('noise', FriedmanArgs.noise),
                              kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == 'friedman2':
        return make_friedman2(n_samples,
                              kwargs.get('noise', Friedman2Args.noise),
                              kwargs.get('random_state', Friedman2Args.random_state))
    elif style == 'friedman3':
        return make_friedman3(n_samples,
                              kwargs.get('noise', Friedman3Args.noise),
                              kwargs.get('random_state', Friedman3Args.random_state))
def with_best_first(cls, max_leaf_nodes):
    return partial(cls, max_leaf_nodes=max_leaf_nodes)


def uniform_dataset(args):
    X = np.random.random(size=(args.num_examples, args.num_features))
    y = np.random.choice([-1, 1], size=args.num_examples)
    return (X, y)

DATASETS = {
    "uniform": uniform_dataset,
    "hastie": lambda args: datasets.make_hastie_10_2(
        n_samples=args.num_examples),
    "friedman1": lambda args: datasets.make_friedman1(
        n_samples=args.num_examples, n_features=args.num_features),
    "friedman2": lambda args: datasets.make_friedman2(
        n_samples=args.num_examples, noise=args.noise),
    "friedman3": lambda args: datasets.make_friedman3(
        n_samples=args.num_examples, noise=args.noise),
    "make_regression": lambda args: datasets.make_regression(
        n_samples=args.num_examples,
        n_features=args.num_features,
        n_informative=args.num_informative)
}

ENSEMBLE_REGRESSORS = [
    ("GB-D1", with_depth(ensemble.GradientBoostingRegressor, 1)),
    ("GB-D3", with_depth(ensemble.GradientBoostingRegressor, 3)),
    ("GB-B10", with_best_first(ensemble.GradientBoostingRegressor, 10)),
    ("RF-D1", with_depth(ensemble.RandomForestRegressor, 1)),
    ("RF-D3", with_depth(ensemble.RandomForestRegressor, 3)),
    ("RF-D5", with_depth(ensemble.RandomForestRegressor, 5)),