Beispiel #1
0
    "RANSAC",
    "K-Neighbors",
    "Radius Neighbors",
    "MLP",
    "Decision Tree",
    "Extra Tree",
    "SVR"
]

classifiers = [
    RandomForestRegressor(n_estimators=200, n_jobs=5,
                          random_state=randomstate),
    ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate),
    # GradientBoostingRegressor(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    # HistGradientBoostingClassifier(random_state=randomstate),    # learning_rate is a hyper-parameter in the range (0.0, 1.0]
    AdaBoostRegressor(n_estimators=200, random_state=randomstate),
    GaussianProcessRegressor(normalize_y=True),
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    (Ridge(), ['predict'], create_regression_problem_1()),
    (RidgeCV(), ['predict'], create_regression_problem_1()),
    (SGDRegressor(), ['predict'], create_regression_problem_1()),
    (Lasso(), ['predict'], create_regression_problem_1()),
    (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]),
     ['predict', 'predict_proba'], create_weird_classification_problem_1()),
    (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))],
                  transformer_weights={
                      'earth': 1,
                      'earth2': 2
                  }), ['transform'], create_weird_classification_problem_1()),
    (RandomForestRegressor(), ['predict'], create_regression_problem_1()),
    (CalibratedClassifierCV(LogisticRegression(),
                            'isotonic'), ['predict_proba'],
     create_weird_classification_problem_1()),
    (AdaBoostRegressor(), ['predict'], create_regression_problem_1()),
    (BaggingRegressor(), ['predict'], create_regression_problem_1()),
    (BaggingClassifier(), ['predict_proba'],
     create_weird_classification_problem_1()),
    (GradientBoostingRegressor(verbose=True), ['predict'],
     create_regression_problem_1(m=100000, n=200)),
    (XGBRegressor(), ['predict'], create_regression_problem_for_xgb_1())
]


# Create tests for numpy_flat language
def create_case_numpy_flat(estimator, methods, fit_data, predict_data,
                           export_predict_data):
    def test_case(self):
        model = clone(estimator)
        model.fit(**fit_data)
from sklearn.preprocessing.data import MinMaxScaler

boston = datasets.load_boston()

X = MinMaxScaler().fit_transform(boston.data)
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

models = []

models.append(RandomForestRegressor())
models.append(RandomForestRegressor(n_estimators=100))
models.append(GradientBoostingRegressor())
models.append(GradientBoostingRegressor(n_estimators=300))
models.append(AdaBoostRegressor())


def evaluation():
    for model in models:
        score = cross_val_score(estimator=model, X=X, y=y, cv=KFold(10), scoring='r2')
        print(score.mean(), score.std())


def visualization():
    for model in models:
        model.fit(X_train, y_train)
        y_predict = model.predict(X_test)
        x_axis = range(0, len(y_test))
        plt.plot(x_axis, y_test, 'r', label='test')
        plt.plot(x_axis, y_predict, 'b', label='predict')
Beispiel #4
0
from sklearn.cluster.spectral import SpectralClustering
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.manifold.spectral_embedding_ import SpectralEmbedding
from sklearn.preprocessing.data import StandardScaler
from sklearn.manifold.t_sne import TSNE
from sklearn.linear_model.theil_sen import TheilSenRegressor
from sklearn.mixture.dpgmm import VBGMM
from sklearn.feature_selection.variance_threshold import VarianceThreshold

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


clf_dict = {'ARDRegression':ARDRegression(),
			'AdaBoostClassifier':AdaBoostClassifier(),
			'AdaBoostRegressor':AdaBoostRegressor(),
			'AdditiveChi2Sampler':AdditiveChi2Sampler(),
			'AffinityPropagation':AffinityPropagation(),
			'AgglomerativeClustering':AgglomerativeClustering(),
			'BaggingClassifier':BaggingClassifier(),
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
Beispiel #5
0
from sklearn.ensemble.weight_boosting import AdaBoostRegressor
from sklearn.model_selection._split import train_test_split

tl = TrendLine(data_type='train')
data_df = tl.get()

train_set, test_set = train_test_split(data_df,
                                       test_size=0.2,
                                       random_state=np.random.randint(1, 1000))

y_train = train_set['time_to_failure']
x_train_seg = train_set['segment_id']
x_train = train_set.drop(['time_to_failure', 'segment_id'], axis=1)

y_test = test_set['time_to_failure']
x_test_seg = test_set['segment_id']
x_test = test_set.drop(['time_to_failure', 'segment_id'], axis=1)

adbReg = AdaBoostRegressor(n_estimators=50,
                           learning_rate=1.0,
                           loss='linear',
                           random_state=42)

adbReg.fit(x_train, y_train)

y_pred = adbReg.predict(x_test)

# y_pred = x_train.mean(axis=1)

print('MAE Score for acerage ', mean_absolute_error(y_test, y_pred))