Ejemplo n.º 1
0
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from ionyx.contrib.keras_builder import KerasBuilder
from ionyx.datasets import DataSetLoader

print('Beginning keras builder test...')

data, X, y = DataSetLoader.load_forest_cover()
n_classes = len(np.unique(y)) + 1

model = KerasBuilder.build_dense_model(input_size=X.shape[1],
                                       output_size=n_classes,
                                       loss='categorical_crossentropy',
                                       metrics=['accuracy'])
model.fit(X, to_categorical(y, n_classes))
score = model.evaluate(X, to_categorical(y, n_classes))
print('Model score = {0}'.format(score[1]))

estimator = KerasClassifier(build_fn=KerasBuilder.build_dense_model,
                            input_size=X.shape[1],
                            output_size=n_classes,
                            loss='categorical_crossentropy',
                            metrics=['accuracy'])
estimator.fit(X, to_categorical(y, n_classes))
score = estimator.score(X, to_categorical(y, n_classes))
print('Estimator score = {0}'.format(score))

print('Done.')
Ejemplo n.º 2
0
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV, cross_val_score
from ionyx.contrib import ProphetRegressor
from ionyx.contrib import SuppressOutput
from ionyx.contrib import TimeSeriesSplit
from ionyx.datasets import DataSetLoader

print('Beginning prophet regressor test...')

data, X, y = DataSetLoader.load_time_series()
prophet = ProphetRegressor(n_changepoints=0)
with SuppressOutput():
    prophet.fit(X, y)
print('Model score = {0}'.format(mean_absolute_error(y, prophet.predict(X))))

cv = TimeSeriesSplit(n_splits=3)
with SuppressOutput():
    score = cross_val_score(prophet, X, y, cv=cv)
print('Cross-validation score = {0}'.format(score))

param_grid = [
    {
        'n_changepoints': [0, 25]
    }
]
grid = GridSearchCV(prophet, param_grid=param_grid, cv=cv, return_train_score=True)
with SuppressOutput():
    grid.fit(X, y)
results = pd.DataFrame(grid.cv_results_)
results = results.sort_values(by='mean_test_score', ascending=False)
Ejemplo n.º 3
0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from ionyx import Blender
from ionyx.datasets import DataSetLoader

print('Beginning blender test...')

_, X, y = DataSetLoader.load_otto_group()
X, y = shuffle(X, y, n_samples=10000)
X = X[:, :30]
models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier())]
cv = KFold()
blender = Blender(models=models, scoring_metric='neg_mean_squared_error')
blender.build_ensemble(X, y, cv, retrain=True)
print(blender.ensemble_)

_, X, y = DataSetLoader.load_otto_group()
X, y = shuffle(X, y, n_samples=10000)
X = X[:, :30]
models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier()),
          ('stacker', LogisticRegression())]
layer_mask = [0, 0, 0, 1]
cv = KFold()
layer_cv = KFold()
blender = Blender(models=models, scoring_metric='neg_mean_squared_error')
blender.build_stacking_ensemble(X, y, cv, layer_cv, layer_mask, retrain=True)
print(blender.ensemble_)
Ejemplo n.º 4
0
from sklearn.decomposition import PCA
from ionyx.datasets import DataSetLoader
from ionyx import Visualizer

print('Beginning visualizer test...')

data, _, _ = DataSetLoader.load_property_inspection()
viz = Visualizer(data)
viz.feature_distributions()
viz.feature_correlations()
viz.variable_relationship(['T1_V1', 'T1_V2'], category_vars=None)
viz.variable_relationship(['T1_V1', 'T1_V2', 'T2_V1'], category_vars=None)
viz.variable_relationship(['T2_V1'], category_vars=['T1_V4'])
viz.variable_relationship(['T2_V1'], category_vars=['T1_V4', 'T1_V7'])
viz.variable_relationship(['T1_V1', 'T2_V1'], category_vars=['T1_V7'])

data, _, _, _ = DataSetLoader.load_bike_sharing()
viz = Visualizer(data)
viz.sequential_relationships()

data, _, _ = DataSetLoader.load_otto_group()
X_cols = data.columns[1:].tolist()
y_col = data.columns[0]
viz = Visualizer(data)
pca = PCA()
viz.transform(pca, X_columns=X_cols)
viz.transform(pca, X_columns=X_cols, y_column=y_col, task='classification')
viz.transform(pca, X_columns=X_cols, y_column=y_col, task='regression')

data, _, _ = DataSetLoader.load_otto_group()
data = data.iloc[:10000, :30]
Ejemplo n.º 5
0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from xgboost import XGBRegressor
from keras.wrappers.scikit_learn import KerasRegressor
from ionyx import Experiment
from ionyx.contrib.keras_builder import KerasBuilder
from ionyx.datasets import DataSetLoader

print('Beginning experiment test...')

data, _, _ = DataSetLoader.load_forest_cover()
X_cols = data.columns[1:].tolist()
y_col = data.columns[0]
logistic = LogisticRegression()
cv = KFold()
experiment = Experiment(package='sklearn',
                        model=logistic,
                        scoring_metric='accuracy',
                        verbose=True,
                        data=data,
                        X_columns=X_cols,
                        y_column=y_col,
                        cv=cv)
experiment.train_model()
experiment.cross_validate()
experiment.learning_curve()
param_grid = [{'alpha': [0.01, 0.1, 1.0]}]
experiment.param_search(param_grid,
                        save_results_path='/home/john/temp/search.csv')
print(experiment.best_model_)
experiment.save_model('/home/john/temp/model.pkl')
Ejemplo n.º 6
0
from ionyx.datasets import DataSetLoader

print('Beginning dataset loader test...')

data, X, y1, y2 = DataSetLoader.load_bike_sharing()
print("Bike-sharing data:")
print("Data shape = {0}, X shape = {1}, y1 shape = {2}, y2 shape = {3}".format(
    data.shape, X.shape, y1.shape, y2.shape))
print(data.head())
print("")

data, X, y = DataSetLoader.load_forest_cover()
print("Forest cover data:")
print("Data shape = {0}, X shape = {1}, y shape = {2}".format(
    data.shape, X.shape, y.shape))
print(data.head())
print("")

data, X, y = DataSetLoader.load_otto_group()
print("Otto group data:")
print("Data shape = {0}, X shape = {1}, y shape = {2}".format(
    data.shape, X.shape, y.shape))
print(data.head())
print("")

data, X, y = DataSetLoader.load_property_inspection()
print("Property inspection data:")
print("Data shape = {0}, X shape = {1}, y shape = {2}".format(
    data.shape, X.shape, y.shape))
print(data.head())
print("")
Ejemplo n.º 7
0
import pprint as pp
import pandas as pd
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
from sklearn.svm import LinearSVR
from ionyx.contrib import AveragingRegressor
from ionyx.datasets import DataSetLoader

print('Beginning averaging regressor test...')

data, X, y = DataSetLoader.load_property_inspection()
data = data.iloc[:1000, :]
X = X[:1000, :]
y = y[:1000]

estimators = [('ridge', Ridge()), ('lasso', Lasso()), ('svm', LinearSVR())]
ensemble = AveragingRegressor(estimators, weights=[1.0, 1.5, 2.0])
ensemble.fit(X, y)
print('Estimators list:')
pp.pprint(ensemble.estimators_)
print('Named estimators dict:')
pp.pprint(ensemble.named_estimators_)
print('Model 1 score = {0}'.format(
    mean_absolute_error(y, ensemble.estimators_[0].predict(X))))
print('Model 2 score = {0}'.format(
    mean_absolute_error(y, ensemble.estimators_[1].predict(X))))
print('Model 3 score = {0}'.format(
    mean_absolute_error(y, ensemble.estimators_[2].predict(X))))
print('Ensemble score = {0}'.format(mean_absolute_error(
    y, ensemble.predict(X))))