import numpy as np from keras.utils.np_utils import to_categorical from keras.wrappers.scikit_learn import KerasClassifier from ionyx.contrib.keras_builder import KerasBuilder from ionyx.datasets import DataSetLoader print('Beginning keras builder test...') data, X, y = DataSetLoader.load_forest_cover() n_classes = len(np.unique(y)) + 1 model = KerasBuilder.build_dense_model(input_size=X.shape[1], output_size=n_classes, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(X, to_categorical(y, n_classes)) score = model.evaluate(X, to_categorical(y, n_classes)) print('Model score = {0}'.format(score[1])) estimator = KerasClassifier(build_fn=KerasBuilder.build_dense_model, input_size=X.shape[1], output_size=n_classes, loss='categorical_crossentropy', metrics=['accuracy']) estimator.fit(X, to_categorical(y, n_classes)) score = estimator.score(X, to_categorical(y, n_classes)) print('Estimator score = {0}'.format(score)) print('Done.')
import pandas as pd from sklearn.metrics import mean_absolute_error from sklearn.model_selection import GridSearchCV, cross_val_score from ionyx.contrib import ProphetRegressor from ionyx.contrib import SuppressOutput from ionyx.contrib import TimeSeriesSplit from ionyx.datasets import DataSetLoader print('Beginning prophet regressor test...') data, X, y = DataSetLoader.load_time_series() prophet = ProphetRegressor(n_changepoints=0) with SuppressOutput(): prophet.fit(X, y) print('Model score = {0}'.format(mean_absolute_error(y, prophet.predict(X)))) cv = TimeSeriesSplit(n_splits=3) with SuppressOutput(): score = cross_val_score(prophet, X, y, cv=cv) print('Cross-validation score = {0}'.format(score)) param_grid = [ { 'n_changepoints': [0, 25] } ] grid = GridSearchCV(prophet, param_grid=param_grid, cv=cv, return_train_score=True) with SuppressOutput(): grid.fit(X, y) results = pd.DataFrame(grid.cv_results_) results = results.sort_values(by='mean_test_score', ascending=False)
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import KFold from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import shuffle from ionyx import Blender from ionyx.datasets import DataSetLoader print('Beginning blender test...') _, X, y = DataSetLoader.load_otto_group() X, y = shuffle(X, y, n_samples=10000) X = X[:, :30] models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier())] cv = KFold() blender = Blender(models=models, scoring_metric='neg_mean_squared_error') blender.build_ensemble(X, y, cv, retrain=True) print(blender.ensemble_) _, X, y = DataSetLoader.load_otto_group() X, y = shuffle(X, y, n_samples=10000) X = X[:, :30] models = [('logistic', LogisticRegression()), ('svc', SVC()), ('decision_tree', DecisionTreeClassifier()), ('stacker', LogisticRegression())] layer_mask = [0, 0, 0, 1] cv = KFold() layer_cv = KFold() blender = Blender(models=models, scoring_metric='neg_mean_squared_error') blender.build_stacking_ensemble(X, y, cv, layer_cv, layer_mask, retrain=True) print(blender.ensemble_)
from sklearn.decomposition import PCA from ionyx.datasets import DataSetLoader from ionyx import Visualizer print('Beginning visualizer test...') data, _, _ = DataSetLoader.load_property_inspection() viz = Visualizer(data) viz.feature_distributions() viz.feature_correlations() viz.variable_relationship(['T1_V1', 'T1_V2'], category_vars=None) viz.variable_relationship(['T1_V1', 'T1_V2', 'T2_V1'], category_vars=None) viz.variable_relationship(['T2_V1'], category_vars=['T1_V4']) viz.variable_relationship(['T2_V1'], category_vars=['T1_V4', 'T1_V7']) viz.variable_relationship(['T1_V1', 'T2_V1'], category_vars=['T1_V7']) data, _, _, _ = DataSetLoader.load_bike_sharing() viz = Visualizer(data) viz.sequential_relationships() data, _, _ = DataSetLoader.load_otto_group() X_cols = data.columns[1:].tolist() y_col = data.columns[0] viz = Visualizer(data) pca = PCA() viz.transform(pca, X_columns=X_cols) viz.transform(pca, X_columns=X_cols, y_column=y_col, task='classification') viz.transform(pca, X_columns=X_cols, y_column=y_col, task='regression') data, _, _ = DataSetLoader.load_otto_group() data = data.iloc[:10000, :30]
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import KFold from xgboost import XGBRegressor from keras.wrappers.scikit_learn import KerasRegressor from ionyx import Experiment from ionyx.contrib.keras_builder import KerasBuilder from ionyx.datasets import DataSetLoader print('Beginning experiment test...') data, _, _ = DataSetLoader.load_forest_cover() X_cols = data.columns[1:].tolist() y_col = data.columns[0] logistic = LogisticRegression() cv = KFold() experiment = Experiment(package='sklearn', model=logistic, scoring_metric='accuracy', verbose=True, data=data, X_columns=X_cols, y_column=y_col, cv=cv) experiment.train_model() experiment.cross_validate() experiment.learning_curve() param_grid = [{'alpha': [0.01, 0.1, 1.0]}] experiment.param_search(param_grid, save_results_path='/home/john/temp/search.csv') print(experiment.best_model_) experiment.save_model('/home/john/temp/model.pkl')
from ionyx.datasets import DataSetLoader print('Beginning dataset loader test...') data, X, y1, y2 = DataSetLoader.load_bike_sharing() print("Bike-sharing data:") print("Data shape = {0}, X shape = {1}, y1 shape = {2}, y2 shape = {3}".format( data.shape, X.shape, y1.shape, y2.shape)) print(data.head()) print("") data, X, y = DataSetLoader.load_forest_cover() print("Forest cover data:") print("Data shape = {0}, X shape = {1}, y shape = {2}".format( data.shape, X.shape, y.shape)) print(data.head()) print("") data, X, y = DataSetLoader.load_otto_group() print("Otto group data:") print("Data shape = {0}, X shape = {1}, y shape = {2}".format( data.shape, X.shape, y.shape)) print(data.head()) print("") data, X, y = DataSetLoader.load_property_inspection() print("Property inspection data:") print("Data shape = {0}, X shape = {1}, y shape = {2}".format( data.shape, X.shape, y.shape)) print(data.head()) print("")
import pprint as pp import pandas as pd from sklearn.linear_model import Ridge, Lasso from sklearn.metrics import mean_absolute_error from sklearn.model_selection import GridSearchCV, KFold, cross_val_score from sklearn.svm import LinearSVR from ionyx.contrib import AveragingRegressor from ionyx.datasets import DataSetLoader print('Beginning averaging regressor test...') data, X, y = DataSetLoader.load_property_inspection() data = data.iloc[:1000, :] X = X[:1000, :] y = y[:1000] estimators = [('ridge', Ridge()), ('lasso', Lasso()), ('svm', LinearSVR())] ensemble = AveragingRegressor(estimators, weights=[1.0, 1.5, 2.0]) ensemble.fit(X, y) print('Estimators list:') pp.pprint(ensemble.estimators_) print('Named estimators dict:') pp.pprint(ensemble.named_estimators_) print('Model 1 score = {0}'.format( mean_absolute_error(y, ensemble.estimators_[0].predict(X)))) print('Model 2 score = {0}'.format( mean_absolute_error(y, ensemble.estimators_[1].predict(X)))) print('Model 3 score = {0}'.format( mean_absolute_error(y, ensemble.estimators_[2].predict(X)))) print('Ensemble score = {0}'.format(mean_absolute_error( y, ensemble.predict(X))))