def FriedmanDataset_2(): d_train = datasets.make_friedman2(240, random_state=0) d_test = datasets.make_friedman2(1000, random_state=0) features_train = d_train[0] for i in range(240): features_train[i] += np.random.normal(0, features_train[i] / 3) target_train = d_train[1] features_test = d_test[0] target_test = d_test[1] return features_train, target_train, features_test, target_test
def test_make_friedman2(): X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0) assert_equal(X.shape, (5, 4), "X shape mismatch") assert_equal(y.shape, (5,), "y shape mismatch") assert_array_almost_equal(y, (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5)
def friedman2(n_samples=100, noise=0.0, random_state=None): return datasets.make_friedman2(n_samples=n_samples, noise=noise, random_state=random_state)
def test_regression_synthetic(): """Test on synthetic regression datasets used in Leo Breiman, `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). """ random_state = check_random_state(1) regression_params = {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 1, 'learning_rate': 0.1, 'loss': 'ls'} # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor() clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 5.0, "Failed on Friedman1 with mse = %.4f" % mse # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 1700.0, "Failed on Friedman2 with mse = %.4f" % mse # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 0.015, "Failed on Friedman3 with mse = %.4f" % mse
def test_make_friedman2(): X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0) assert X.shape == (5, 4), "X shape mismatch" assert y.shape == (5, ), "y shape mismatch" assert_array_almost_equal(y, (X[:, 0]**2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3]))**2)**0.5)
def friedman_2(n, noise): """Generate the friedman_2 data set Args: n (int): number of data samples noise (float): added noise Returns: Friedman 2 data set """ return make_friedman2(n_samples=n, noise=noise)
def generateDatas(N, F, choice): if (choice == 'f1'): X, Y = datasets.make_friedman1(N, F, noise=1) elif (choice == 'f2'): X, Y = datasets.make_friedman2(N, F) elif (choice == 'f3'): X, Y == datasets.make_friedman3(N, F) elif (choice == 'boston'): boston = datasets.load_boston() X, Y = boston.data, boston.target return X, Y
def genFriedman(self, i=1, N=240, D=10): if i not in range(1, 4): raise Exception('not a correct dataset') if i == 1: X, Y = datasets.make_friedman1(N, D) if i == 2: X, Y = datasets.make_friedman2(N, D) if i == 3: X, Y = datasets.make_friedman3(N, D) return X, Y
def genFriedman(self, i=1, N=240, D=10): if i not in range(1,4): raise Exception('not a correct dataset') if i == 1: X, Y = datasets.make_friedman1(N, D ) if i == 2: X, Y = datasets.make_friedman2(N, D) if i == 3: X, Y = datasets.make_friedman3(N, D) return X, Y
def define_tested_reg_datasets(): gDatasets = {}; gDatasets["diabetes"] = datasets.load_diabetes() gDatasets["boston"] = datasets.load_boston() gDatasets["freidman1"] = datasets.make_friedman1(random_state=1960) gDatasets["freidman2"] = datasets.make_friedman2(random_state=1960) gDatasets["freidman3"] = datasets.make_friedman3(random_state=1960) gDatasets["RandomReg_10"] = datasets.make_regression(n_features=10, random_state=1960); gDatasets["RandomReg_100"] = datasets.make_regression(n_features=100, random_state=1960); gDatasets["RandomReg_500"] = datasets.make_regression(n_features=500, random_state=1960); return gDatasets;
def generate_Dataset(name_dataset): # default ind = f(name_dataset) if ind == 1: x,y = datasets.make_friedman1(n_samples=100, n_features=10, noise=0.0, random_state=None) elif ind == 2: x,y = datasets.make_friedman2(n_samples=100, noise=0.0, random_state=None) elif ind == 3: x,y = datasets.make_friedman3(n_samples=100, noise=0.0, random_state=None) else : x, y = datasets.load_boston(return_X_y=True) x = x.tolist() return x,y
def test_regression_synthetic(): # Test on synthetic regression datasets used in Leo Breiman, # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = { 'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.1, 'loss': 'ls' } # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: clf = GradientBoostingRegressor(presort=presort) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 5.0) # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 1700.0) # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 0.015)
def test_regression_synthetic(): # Test on synthetic regression datasets used in Leo Breiman, # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = {'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.1, 'loss': 'ls'} # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: clf = GradientBoostingRegressor(presort=presort) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 5.0) # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 1700.0) # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 0.015)
def main(): X, y = make_friedman2( n_samples=500, noise=0, random_state=0) # This is the test data we are fitting print(type(X)) print(np.shape(X)) # 500 samples each with 4 variables print(type(y)) print(np.shape(y)) # 500 outputs kernel = DotProduct() + WhiteKernel() gpr = GaussianProcessRegressor(kernel=kernel, optimizer='fmin_l_bfgs_b', random_state=0).fit(X, y) gpr_score = gpr.score(X, y) # R2 of the prediction print('Prediction of R^2: %f' % gpr_score) print("Shape of thing ", np.shape(X[:2, :])) print("Thing ", X[:2, :]) gpr_predict = gpr.predict(X[:2, :], return_std=True) print(gpr_predict)
def test_regression_synthetic(): # Test on synthetic regression datasets used in Leo Breiman, # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = { "n_estimators": 100, "max_depth": 4, "min_samples_split": 2, "learning_rate": 0.1, "loss": "squared_error", } # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor() clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 5.0 # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 1700.0 # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert mse < 0.015
def test_all_regressors(): x, y = make_friedman2(10000) x_train, y_train, x_test, y_test = test_helpers.split_dataset(x,y) #print y_test[:100] ols = LinearRegression() ols.fit(x_train, y_train) ols_pred = ols.predict(x_test) #print ols_pred[:100] ols_mse = mean_square_error(y_test, ols_pred) for fn in regressors: print fn model = fn(x_train,y_train) print model pred = model.predict(x_test) #print pred[:100] mse = mean_square_error(y_test, pred) print "OLS MSE:", ols_mse, " Current MSE:", mse print "Ratio:", mse / ols_mse assert ols_mse > 1.1*mse
def test_all_regressors(): x, y = make_friedman2(10000) x_train, y_train, x_test, y_test = test_helpers.split_dataset(x, y) #print y_test[:100] ols = LinearRegression() ols.fit(x_train, y_train) ols_pred = ols.predict(x_test) #print ols_pred[:100] ols_mse = mean_square_error(y_test, ols_pred) for fn in regressors: print fn model = fn(x_train, y_train) print model pred = model.predict(x_test) #print pred[:100] mse = mean_square_error(y_test, pred) print "OLS MSE:", ols_mse, " Current MSE:", mse print "Ratio:", mse / ols_mse assert ols_mse > 1.1 * mse
def friedman2(n_samples=20000): """ Generated data """ (data, target) = datasets.make_friedman2(n_samples=n_samples) return DatasetFactory.Dataset(data=data, target=target)
def uniform_dataset(args): X = np.random.random(size=(args.num_examples, args.num_features)) y = np.random.choice([-1, 1], size=args.num_examples) return (X, y) DATASETS = { "uniform": uniform_dataset, "hastie": lambda args: datasets.make_hastie_10_2(n_samples=args.num_examples), "friedman1": lambda args: datasets.make_friedman1(n_samples=args.num_examples, n_features=args.num_features), "friedman2": lambda args: datasets.make_friedman2(n_samples=args.num_examples, noise=args.noise), "friedman3": lambda args: datasets.make_friedman3(n_samples=args.num_examples, noise=args.noise), "make_regression": lambda args: datasets.make_regression(n_samples=args.num_examples, n_features=args.num_features, n_informative=args.num_informative) } ENSEMBLE_REGRESSORS = [ ("GB-D1", with_depth(ensemble.GradientBoostingRegressor, 1)), ("GB-D3", with_depth(ensemble.GradientBoostingRegressor, 3)), ("GB-B10", with_best_first(ensemble.GradientBoostingRegressor, 10)), ("RF-D1", with_depth(ensemble.RandomForestRegressor, 1)), ("RF-D3", with_depth(ensemble.RandomForestRegressor, 3)),
# filename = input('Введите путь файла для записи: ') # Define the color maps for plots color_map = plt.cm.get_cmap('RdYlBu') color_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list( "", ["red", "cyan", "magenta", "blue"]) fig = plt.figure(figsize=(18, 5)) x, y = dt.make_friedman1(n_samples=1000, n_features=5, random_state=rand_state) dataset_x1 = x dataset_y1 = y ax = fig.add_subplot(131, projection='3d') my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map) fig.colorbar(my_scatter_plot) plt.title('make_friedman1') x, y = dt.make_friedman2(n_samples=1000, random_state=rand_state) dataset_x2 = x dataset_y2 = y ax = fig.add_subplot(132, projection='3d') my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map) fig.colorbar(my_scatter_plot) plt.title('make_friedman2') x, y = dt.make_friedman3(n_samples=1000, random_state=rand_state) dataset_x3 = x dataset_y3 = y ax = fig.add_subplot(133, projection='3d') my_scatter_plot = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y, cmap=color_map) fig.colorbar(my_scatter_plot) plt.suptitle('make_friedman?() for Non-Linear Data', fontsize=20) plt.title('make_friedman3')
'''Friedman #2 data example''' import rvm from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import numpy as np from sklearn.metrics import mean_squared_error from sklearn.datasets import make_friedman2 from sklearn.datasets import make_friedman3 print('Friedman data set') np.random.seed(2) n_exp = 1 rel_vec = 0 RMSE = 0 for i in range(n_exp): train_x, train_y = make_friedman2(240, noise=0) cl = rvm.RVMRegression(kernel="rbf", gamma=0.00001) cl.fit(np.matrix(train_x), np.matrix(train_y.reshape(240, 1))) rel_vec += len(cl.rel_ind) valid_x, valid_y = make_friedman2(240) pred_y = cl.predict(valid_x) RMSE += mean_squared_error(valid_y, pred_y) ** 0.5 print("Vectors: ", rel_vec / n_exp) print("Root mean square = ", RMSE / n_exp)
def getSKData(style='timeseries', n_samples=1, **kwargs): if isinstance(style, str): style = Style(style.lower()) if style == Style.REGRESSION: return make_regression( n_samples, kwargs.get('n_features', RegressionArgs.n_features), kwargs.get('n_informative', RegressionArgs.n_informative), kwargs.get('n_targets', RegressionArgs.n_targets), kwargs.get('bias', RegressionArgs.bias), kwargs.get('effective_rank', RegressionArgs.effective_rank), kwargs.get('tail_strength', RegressionArgs.tail_strength), kwargs.get('noise', RegressionArgs.noise), kwargs.get('shuffle', RegressionArgs.shuffle), kwargs.get('coef', RegressionArgs.coef), kwargs.get('random_state', RegressionArgs.random_state)) elif style == Style.BLOBS: return make_blobs(n_samples, kwargs.get('n_features', BlobsArgs.n_features), kwargs.get('centers', BlobsArgs.centers), kwargs.get('cluster_std', BlobsArgs.cluster_std), kwargs.get('center_box', BlobsArgs.center_box), kwargs.get('shuffle', BlobsArgs.shuffle), kwargs.get('random_state', BlobsArgs.random_state)) elif style == Style.CLASSIFICATION: return make_classification( n_samples, kwargs.get('n_features', ClassificationArgs.n_features), kwargs.get('n_informative', ClassificationArgs.n_informative), kwargs.get('n_redundant', ClassificationArgs.n_redundant), kwargs.get('n_repeated', ClassificationArgs.n_repeated), kwargs.get('n_classes', ClassificationArgs.n_classes), kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class), kwargs.get('weights', ClassificationArgs.weights), kwargs.get('flip_y', ClassificationArgs.flip_y), kwargs.get('class_sep', ClassificationArgs.class_sep), kwargs.get('hypercube', ClassificationArgs.hypercube), kwargs.get('shift', ClassificationArgs.shift), kwargs.get('scale', ClassificationArgs.scale), kwargs.get('shuffle', ClassificationArgs.shuffle), kwargs.get('random_state', ClassificationArgs.random_state)) elif style == Style.MULTILABEL: return make_multilabel_classification( n_samples, kwargs.get('n_features', MultilabelClassificationArgs.n_features), kwargs.get('n_classes', MultilabelClassificationArgs.n_classes), kwargs.get('n_labels', MultilabelClassificationArgs.n_labels), kwargs.get('length', MultilabelClassificationArgs.length), kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled), kwargs.get('sparse', MultilabelClassificationArgs.sparse), kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator), kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions), kwargs.get('random_state', MultilabelClassificationArgs.random_state)) elif style == Style.GAUSSIAN: return make_gaussian_quantiles( n_samples=n_samples, n_features=kwargs.get('n_features', GaussianArgs.n_features), mean=kwargs.get('mean', GaussianArgs.mean), cov=kwargs.get('cov', GaussianArgs.cov), n_classes=kwargs.get('n_classes', GaussianArgs.n_classes), shuffle=kwargs.get('shuffle', GaussianArgs.shuffle), random_state=kwargs.get('random_state', GaussianArgs.random_state)) elif style == Style.HASTIE: return make_hastie_10_2(n_samples, random_state=kwargs.get( 'random_state', HastieArgs.random_state)) elif style == Style.CIRCLES: return make_circles( n_samples, kwargs.get('shuffle', CirclesArgs.shuffle), kwargs.get('noise', CirclesArgs.noise), kwargs.get('random_state', CirclesArgs.random_state), kwargs.get('factor', CirclesArgs.factor)) elif style == Style.MOONS: return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle), kwargs.get('noise', MoonsArgs.noise), kwargs.get('random_state', MoonsArgs.random_state)) elif style == Style.BICLUSTERS: return make_biclusters( kwargs.get('shape', BiclusterArgs.shape), kwargs.get('n_clusters', BiclusterArgs.n_clusters), kwargs.get('noise', BiclusterArgs.noise), kwargs.get('minval', BiclusterArgs.minval), kwargs.get('maxval', BiclusterArgs.maxval), kwargs.get('shuffle', BiclusterArgs.shuffle), kwargs.get('random_state', BiclusterArgs.random_state)) elif style == Style.SCURVE: return make_s_curve( n_samples, kwargs.get('noise', SCurveArgs.noise), kwargs.get('random_state', SCurveArgs.random_state)) elif style == Style.CHECKER: return make_checkerboard( kwargs.get('shape', CheckerArgs.shape), kwargs.get('n_clusters', CheckerArgs.n_clusters), kwargs.get('noise', CheckerArgs.noise), kwargs.get('minval', CheckerArgs.minval), kwargs.get('maxval', CheckerArgs.maxval), kwargs.get('shuffle', CheckerArgs.shuffle), kwargs.get('random_state', CheckerArgs.random_state)) elif style == Style.FRIEDMAN: return make_friedman1( n_samples, kwargs.get('n_features', FriedmanArgs.n_features), kwargs.get('noise', FriedmanArgs.noise), kwargs.get('random_state', FriedmanArgs.random_state)) elif style == Style.FRIEDMAN2: return make_friedman2( n_samples, kwargs.get('noise', Friedman2Args.noise), kwargs.get('random_state', Friedman2Args.random_state)) elif style == Style.FRIEDMAN3: return make_friedman3( n_samples, kwargs.get('noise', Friedman3Args.noise), kwargs.get('random_state', Friedman3Args.random_state))
def prep_data_sklearn(dataset_name, test_size=0.2, model_class='realkd', downsample_size=None, norm_mean=False, random_seed=None, pos_class=None): target_name, without = dataset_signature(dataset_name) if dataset_name == 'tic-tac-toe': bunch = ds.fetch_openml(dataset_name) df = pd.DataFrame(bunch.data, columns=bunch.feature_names) df.rename(lambda s: s[:-7], axis='columns', inplace=True) df.replace(0, 'b', inplace=True) df.replace(1, 'o', inplace=True) df.replace(2, 'x', inplace=True) data_rf = pd.get_dummies(df) target = pd.Series(where(bunch.target == 'positive', 1, -1)) elif dataset_name == 'kr - vs - kp': bunch = ds.fetch_openml(data_id=3) df = pd.DataFrame(bunch.data, columns=bunch.feature_names) data_rf = pd.get_dummies(df) target = pd.Series(where(bunch.target == 'won', 1, -1)) elif dataset_name == 'breast_cancer': bunch = ds.load_breast_cancer() df = pd.DataFrame(bunch.data, columns=bunch.feature_names) data_rf = pd.get_dummies(df) target = pd.Series(where(bunch.target == 1, 1, -1)) elif dataset_name == 'iris': bunch = ds.load_iris() df = pd.DataFrame(bunch.data, columns=bunch.feature_names) data_rf = pd.get_dummies(df) target = pd.Series(where(bunch.target == 1, 1, -1)) elif dataset_name == 'make_friedman1': global_friedman_cols =10 data, target = ds.make_friedman1(n_samples=2000, n_features=10, noise=0.1, random_state=random_seed) # 1 no_cols = np.size(data, 1) col_names = ['X' + str(i+1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(target) elif dataset_name == 'make_friedman2': data, target = ds.make_friedman2(n_samples=2000, noise=0.1, random_state=random_seed) # 1 no_cols = np.size(data, 1) col_names = ['X' + str(i+1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(target) elif dataset_name == 'make_friedman3': data, target = ds.make_friedman3(n_samples=2000, noise=0.1, random_state=random_seed) no_cols = np.size(data, 1) col_names = ['X' + str(i+1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(target) elif dataset_name == 'make_classification2': data, target = ds.make_classification(n_samples=2000, n_features=8, n_classes=2, hypercube=True, n_clusters_per_class=3, n_informative=3, n_redundant=3, n_repeated=0, random_state=random_seed) no_cols = np.size(data, 1) col_names = ['X' + str(i+1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(where(target == 1, 1, -1)) elif dataset_name == 'make_classification3': data, target = ds.make_classification(n_samples=2000, n_features=15, n_classes=3, hypercube=True, n_clusters_per_class=3, n_informative=5, n_redundant=5, n_repeated=0, random_state=random_seed) no_cols = np.size(data, 1) col_names = ['X' + str(i + 1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(where(target == 1, 1, -1)) elif dataset_name == 'load_wine': bunch = ds.load_wine() df = pd.DataFrame(bunch.data, columns=bunch.feature_names) data_rf = pd.get_dummies(df) target = pd.Series(where(bunch.target == 1, 1, -1)) elif dataset_name == 'make_hastie_10_2': data, target = ds.make_hastie_10_2(n_samples=12000, random_state=random_seed) no_cols = np.size(data, 1) col_names = ['X' + str(i+1) for i in range(no_cols)] data_rf = pd.DataFrame(data, columns=col_names) target = pd.Series(where(target == 1, 1, -1)) elif dataset_name == 'load_diabetes': bunch = ds.load_diabetes() df = pd.DataFrame(bunch.data, columns=bunch.feature_names) data_rf = pd.get_dummies(df) target = pd.Series(bunch.target) elif dataset_name[:-1] == 'noisy_pairity_': d = int(dataset_name[-1]) data, target_name, random_seed = prep_noisy_pairity(d=d, random_seed=random_seed) return data, target_name, random_seed elif dataset_name == 'digits5': data_rf, target = prep_digits() x_train, x_test, y_train, y_test = train_test_split(data_rf, target, test_size=test_size, random_state=random_seed) if downsample_size != None: x_train[target_name] = y_train sampled_train = x_train.sample(n=min(downsample_size, len(y_train)), random_state=random_seed) x_train.reset_index(inplace=True, drop=True) # this may be unncessesary y_train = sampled_train[target_name] x_train = sampled_train.drop([target_name], axis='columns') if norm_mean: # scikitlearn transformer. target_train_mean = sum(y_train) / len(y_train) y_train -= target_train_mean y_test -= target_train_mean y_train = [y_train, target_train_mean] y_test = [y_test, target_train_mean] data = [x_train, y_train, x_test, y_test] n = (len(y_train), len(y_test)) return data, target_name, random_seed
from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.datasets import make_friedman2 import matplotlib.pyplot as plt from sklearn.gaussian_process.kernels import Matern import numpy as np N = 10000 addns = 2 X, _ = make_friedman2(n_samples=N, noise=0, random_state=0) Xtrain = X[:int(0.7 * N)] Xpool = X[int(0.7 * N):int(0.8 * N)] Xtest = X[int(0.8 * N):] Xtrain = (Xtrain - np.mean(Xtrain)) / np.std(Xtrain) Xtest = (Xtest - np.mean(Xtest)) / np.std(Xtest) y = np.random.randint(2, size=N) ytrain = y[:int(0.7 * N)] ypool = X[int(0.7 * N):int(0.8 * N)] ytest = y[int(0.7 * N):] model = GaussianProcessClassifier() gp = model.fit(Xtrain, ytrain) preds = gp.predict_proba(Xtrain) preds = np.max(preds, axis=1) newXs = (1 - preds).argsort()[-addns:][::-1]
@logger.catch def predict_proba(self, X, **kwargs): prediction = self.model.predict_proba(X, **kwargs) return prediction @logger.catch def partial_fit(self, X, y, **kwargs): self.changed = True self.model.partial_fit(X, y, **kwargs) def fit(self, X, y, **kwargs): self.changed = True self.model.fit(X, y, **kwargs) class CustomSklearnGaussianProcedure(SklearnProcedure): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) kernel = DotProduct() + WhiteKernel() gpr = GaussianProcessRegressor(kernel=kernel, random_state=0) self.dictionary.model = gpr if __name__ == "__main__": general_procedure = CustomSklearnGaussianProcedure() X, y = make_friedman2(n_samples=500, noise=0, random_state=0) general_procedure.fit(X, y) print(general_procedure.predict(X[:2, :], return_std=True)) # print(general_procedure.get_params()) print(general_procedure.extract())
import numpy as np import sklearn.datasets as Datasets import seaborn as sn import matplotlib.pyplot as plt X, y = Datasets.make_friedman2(200, 0.3) index = np.random.choice(range(len(X)), 3) center = X[index, :] print(center)
print("the output of load_wine() :: ", datasets.load_wine()) #make_blobs() excuted print("the output of make_blobs() :: ", datasets.make_blobs()) #make_circles() executed print("the output of make_circles() :: ", datasets.make_circles()) #make_classification() executed print("the output of make_classification() :: ", datasets.make_classification()) #make_friedman1() executed print("the output of make_friedman1() :: ", datasets.make_friedman1()) #make_friedman2() executed print("the output of make_friedman2() :: ", datasets.make_friedman2()) #make_friedman3() executed print("the output of make_friedman3() :: ", datasets.make_friedman3()) #make_gaussian_quantiles() executed print("the output of make_gaussian_quantiles() :: ", datasets.make_gaussian_quantiles()) #make_hastie_10_2() executed print("the output of make_hastie_10_2() :: ", datasets.make_hastie_10_2()) #make_moons() executed print("the output of make_moons() :: ", datasets.make_moons()) #make_multilabel_classification() executed
from sklearn import datasets import matplotlib.pyplot as plt # make_friedman2 data X, y = datasets.make_friedman2(n_samples=100, noise=0.0, random_state=None) print(X) print(y)
try: reload # Python 2.7 except NameError: try: from importlib import reload # Python 3.4+ except ImportError: from imp import reload # Python 3.0 - 3.3 NNC = reload(NNC) import tensorflow as tf boston = sdata.load_boston() diabetes = sdata.load_diabetes() mf1 = sdata.make_friedman1(n_samples=2500) mf2 = sdata.make_friedman2(n_samples=2500) datas =\ [ [boston.data, boston.target, "boston"], [diabetes.data, diabetes.target, "diabetes"], [mf1[0], mf1[1], "friedman1"], [mf2[0], mf2[1], "friedman2"], ] dict_layers = lambda x,size :\ { "I" : x,\ "N1" : size,\ "N2" : size,\ "N3" : size,\ "N4" : size,\ "N5" : size,\
def getSKData(style='timeseries', as_dataframe=False, n_samples=10, **kwargs): if style == 'regression': return make_regression(n_samples, kwargs.get('n_features', RegressionArgs.n_features), kwargs.get('n_informative', RegressionArgs.n_informative), kwargs.get('n_targets', RegressionArgs.n_targets), kwargs.get('bias', RegressionArgs.bias), kwargs.get('effective_rank', RegressionArgs.effective_rank), kwargs.get('tail_strength', RegressionArgs.tail_strength), kwargs.get('noise', RegressionArgs.noise), kwargs.get('shuffle', RegressionArgs.shuffle), kwargs.get('coef', RegressionArgs.coef), kwargs.get('random_state', RegressionArgs.random_state)) elif style == 'blobs': return make_blobs(n_samples, kwargs.get('n_features', BlobsArgs.n_features), kwargs.get('centers', BlobsArgs.centers), kwargs.get('cluster_std', BlobsArgs.cluster_std), kwargs.get('center_box', BlobsArgs.center_box), kwargs.get('shuffle', BlobsArgs.shuffle), kwargs.get('random_state', BlobsArgs.random_state)) elif style == 'classification': return make_classification(n_samples, kwargs.get('n_features', ClassificationArgs.n_features), kwargs.get('n_informative', ClassificationArgs.n_informative), kwargs.get('n_redundant', ClassificationArgs.n_redundant), kwargs.get('n_repeated', ClassificationArgs.n_repeated), kwargs.get('n_classes', ClassificationArgs.n_classes), kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class), kwargs.get('weights', ClassificationArgs.weights), kwargs.get('flip_y', ClassificationArgs.flip_y), kwargs.get('class_sep', ClassificationArgs.class_sep), kwargs.get('hypercube', ClassificationArgs.hypercube), kwargs.get('shift', ClassificationArgs.shift), kwargs.get('scale', ClassificationArgs.scale), kwargs.get('shuffle', ClassificationArgs.shuffle), kwargs.get('random_state', ClassificationArgs.random_state)) elif style == 'multilabel': return make_multilabel_classification(n_samples, kwargs.get('n_features', MultilabelClassificationArgs.n_features), kwargs.get('n_classes', MultilabelClassificationArgs.n_classes), kwargs.get('n_labels', MultilabelClassificationArgs.n_labels), kwargs.get('length', MultilabelClassificationArgs.length), kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled), kwargs.get('sparse', MultilabelClassificationArgs.sparse), kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator), kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions), kwargs.get('random_state', MultilabelClassificationArgs.random_state)) elif style == 'gaussian': return make_gaussian_quantiles(n_samples=n_samples, n_features=kwargs.get('n_features', GaussianArgs.n_features), mean=kwargs.get('mean', GaussianArgs.mean), cov=kwargs.get('cov', GaussianArgs.cov), n_classes=kwargs.get('n_classes', GaussianArgs.n_classes), shuffle=kwargs.get('shuffle', GaussianArgs.shuffle), random_state=kwargs.get('random_state', GaussianArgs.random_state)) elif style == 'hastie': return make_hastie_10_2(n_samples, random_state=kwargs.get('random_state', HastieArgs.random_state)) elif style == 'circles': return make_circles(n_samples, kwargs.get('shuffle', CirclesArgs.shuffle), kwargs.get('noise', CirclesArgs.noise), kwargs.get('random_state', CirclesArgs.random_state), kwargs.get('factor', CirclesArgs.factor)) elif style == 'moons': return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle), kwargs.get('noise', MoonsArgs.noise), kwargs.get('random_state', MoonsArgs.random_state)) elif style == 'biclusters': x = make_biclusters(kwargs.get('shape', BiclusterArgs.shape), kwargs.get('n_clusters', BiclusterArgs.n_clusters), kwargs.get('noise', BiclusterArgs.noise), kwargs.get('minval', BiclusterArgs.minval), kwargs.get('maxval', BiclusterArgs.maxval), kwargs.get('shuffle', BiclusterArgs.shuffle), kwargs.get('random_state', BiclusterArgs.random_state)) if as_dataframe: return pd.concat([pd.DataFrame(x[0]), pd.DataFrame(x[1].T)], axis=1) else: return x elif style == 'scurve': return make_s_curve(n_samples, kwargs.get('noise', SCurveArgs.noise), kwargs.get('random_state', SCurveArgs.random_state)) elif style == 'checker': return make_checkerboard(kwargs.get('shape', CheckerArgs.shape), kwargs.get('n_clusters', CheckerArgs.n_clusters), kwargs.get('noise', CheckerArgs.noise), kwargs.get('minval', CheckerArgs.minval), kwargs.get('maxval', CheckerArgs.maxval), kwargs.get('shuffle', CheckerArgs.shuffle), kwargs.get('random_state', CheckerArgs.random_state)) elif style == 'friedman': return make_friedman1(n_samples, kwargs.get('n_features', FriedmanArgs.n_features), kwargs.get('noise', FriedmanArgs.noise), kwargs.get('random_state', FriedmanArgs.random_state)) elif style == 'friedman2': return make_friedman2(n_samples, kwargs.get('noise', Friedman2Args.noise), kwargs.get('random_state', Friedman2Args.random_state)) elif style == 'friedman3': return make_friedman3(n_samples, kwargs.get('noise', Friedman3Args.noise), kwargs.get('random_state', Friedman3Args.random_state))
def with_best_first(cls, max_leaf_nodes): return partial(cls, max_leaf_nodes=max_leaf_nodes) def uniform_dataset(args): X = np.random.random(size=(args.num_examples, args.num_features)) y = np.random.choice([-1, 1], size=args.num_examples) return (X, y) DATASETS = { "uniform": uniform_dataset, "hastie": lambda args: datasets.make_hastie_10_2( n_samples=args.num_examples), "friedman1": lambda args: datasets.make_friedman1( n_samples=args.num_examples, n_features=args.num_features), "friedman2": lambda args: datasets.make_friedman2( n_samples=args.num_examples, noise=args.noise), "friedman3": lambda args: datasets.make_friedman3( n_samples=args.num_examples, noise=args.noise), "make_regression": lambda args: datasets.make_regression( n_samples=args.num_examples, n_features=args.num_features, n_informative=args.num_informative) } ENSEMBLE_REGRESSORS = [ ("GB-D1", with_depth(ensemble.GradientBoostingRegressor, 1)), ("GB-D3", with_depth(ensemble.GradientBoostingRegressor, 3)), ("GB-B10", with_best_first(ensemble.GradientBoostingRegressor, 10)), ("RF-D1", with_depth(ensemble.RandomForestRegressor, 1)), ("RF-D3", with_depth(ensemble.RandomForestRegressor, 3)), ("RF-D5", with_depth(ensemble.RandomForestRegressor, 5)),