def test_ZeroCount(): """Assert that ZeroCount operator returns correct transformed X.""" op = ZeroCount() X_transformed = op.transform(X) zero_col = np.array([3, 2, 1, 4]) non_zero = np.array([2, 3, 4, 1]) assert np.allclose(zero_col, X_transformed[:, 0]) assert np.allclose(non_zero, X_transformed[:, 1])
def get_model_v4(): exported_pipeline = make_pipeline( StackingEstimator(estimator=XGBClassifier(learning_rate=0.001, max_depth=2, min_child_weight=17, n_estimators=100, nthread=1, subsample=0.8)), ZeroCount(), VarianceThreshold(threshold=0.2), RFE(estimator=ExtraTreesClassifier(criterion="entropy", max_features=0.15000000000000002, n_estimators=100), step=0.2), GradientBoostingClassifier(learning_rate=0.5, max_depth=7, max_features=0.15000000000000002, min_samples_leaf=2, min_samples_split=3, n_estimators=100, subsample=1.0) ) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 37) return exported_pipeline
def pipeline_suggested_by_tpot(self): # Copied from optimal pipeline suggested by tpot in file "optimal_pipeline.py" # Initialize exported_pipeline = make_pipeline( PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), VarianceThreshold(threshold=0.2), ZeroCount(), GradientBoostingClassifier(learning_rate=1.0, max_depth=10, max_features=0.9000000000000001, min_samples_leaf=16, min_samples_split=3, n_estimators=100, subsample=0.7000000000000001) ) # Init training exported_pipeline.fit(self.x_train, self.y_train) print(f"Train acc: {exported_pipeline.score(self.x_train, self.y_train)}") print(f"Test acc: {exported_pipeline.score(self.x_test, self.y_test)}")
def clf(in_put, out_put): from sklearn.decomposition import PCA from sklearn.linear_model import LassoLarsCV from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.svm import LinearSVR from tpot.builtins import StackingEstimator, ZeroCount exported_pipeline = make_pipeline( PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), StackingEstimator(estimator=LinearSVR(C=15.0, dual=True, epsilon=1.0, loss="epsilon_insensitive", tol=1e-05)), PCA(iterated_power=7, svd_solver="randomized"), ZeroCount(), LassoLarsCV(normalize=True) ) exported_pipeline.fit(in_put, out_put) results = exported_pipeline.predict(in_put) return results
def __init__(self): #On lit le fichier CSV grace a pd qui contient les memes données que dans la base de donnée tpot_data = pd.read_csv('cryptodata.csv', sep=',') #On s'interesse que au prix moyen du DASH, de son volume google trend et son sentiment twitter pour notre Variable X X = tpot_data[tpot_data["symbol"] == "DASH"][[ "price_ave", "volume", "google_trend", "twitter_sent" ]].values #Pour la variable Y on s'interesse seulement a la variable prix y = tpot_data[tpot_data["symbol"] == "DASH"][["price"]].values training_features, testing_features, training_target, testing_target = \ train_test_split(X, y, random_state=42) self.__std = stdev([item[0] for item in y]) # Le score sur l'ensemble de formation était:-6.2249531865813035 self.exported_pipeline = make_pipeline( VarianceThreshold(threshold=0.05), ZeroCount(), PCA(iterated_power=1, svd_solver="randomized"), StackingEstimator(estimator=ElasticNetCV( l1_ratio=0.8500000000000001, tol=0.001)), StackingEstimator( estimator=LinearSVR(C=20.0, dual=False, epsilon=0.01, loss="squared_epsilon_insensitive", tol=0.01)), ExtraTreesRegressor(bootstrap=False, max_features=0.8, min_samples_leaf=1, min_samples_split=2, n_estimators=100)) self.exported_pipeline.fit(training_features, training_target.ravel()) self.y_predict = self.exported_pipeline.predict(testing_features) self.y_real = testing_target.ravel() self.score = self.exported_pipeline.score(testing_features, testing_target)
from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures from tpot.builtins import ZeroCount from tpot.export_utils import set_param_recursive # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=1) # Average CV score on the training set was: 0.9347254053136407 exported_pipeline = make_pipeline( PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), VarianceThreshold(threshold=0.2), ZeroCount(), GradientBoostingClassifier(learning_rate=1.0, max_depth=10, max_features=0.9000000000000001, min_samples_leaf=16, min_samples_split=3, n_estimators=100, subsample=0.7000000000000001)) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 1) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector, ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=39) # Average CV score on the training set was:0.6838260869565218 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="module23.csv"), ZeroCount(), ExtraTreesClassifier(bootstrap=True, criterion="entropy", max_features=0.6500000000000001, min_samples_leaf=14, min_samples_split=18, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB, GaussianNB from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator, ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:0.8686359265648864 exported_pipeline = make_pipeline( StackingEstimator(estimator=BernoulliNB(alpha=0.1, fit_prior=True)), StackingEstimator(estimator=GaussianNB()), ZeroCount(), RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.4, min_samples_leaf=12, min_samples_split=3, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from sklearn.gaussian_process.kernels import Matern from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import RobustScaler from tpot.builtins import StackingEstimator, ZeroCount from tpot.export_utils import set_param_recursive from sklearn.preprocessing import FunctionTransformer from copy import copy # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=123) # Average CV score on the training set was: 0.9837855826340283 exported_pipeline = make_pipeline( make_union(FunctionTransformer(copy), FunctionTransformer(copy)), RobustScaler(), ZeroCount(), GaussianProcessRegressor(kernel=Matern(length_scale=4.3999999999999995, nu=2.5), n_restarts_optimizer=60, normalize_y=False)) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 123) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from copy import copy # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=None) # Average CV score on the training set was: 0.8336787977583366 exported_pipeline = make_pipeline( make_union( make_pipeline( make_union( make_union(Normalizer(norm="l2"), MaxAbsScaler()), StackingEstimator( estimator=DecisionTreeClassifier(criterion="entropy", max_depth=2, min_samples_leaf=11, min_samples_split=17))), StackingEstimator( estimator=MLPClassifier(alpha=0.01, learning_rate_init=0.001)), SelectPercentile(score_func=f_classif, percentile=54)), FunctionTransformer(copy)), ZeroCount(), StackingEstimator(estimator=BernoulliNB(alpha=1.0, fit_prior=True)), MultinomialNB(alpha=0.01, fit_prior=True)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from metstab_shap.data import load_data # load data (and change to classification if needed) data_cfg = parse_data_config('configs/data/rat.cfg') repr_cfg = parse_representation_config('configs/repr/maccs.cfg') task_cfg = parse_task_config('configs/task/regression.cfg') x, y, _, test_x, test_y, smiles, test_smiles = load_data( data_cfg, **repr_cfg[utils_section]) training_features = x training_target = y testing_features = test_x # Average CV score on the training set was: -0.15289999993179348 exported_pipeline = make_pipeline( ZeroCount(), MinMaxScaler(), StackingEstimator(estimator=DecisionTreeRegressor(max_depth=5, max_features=0.25, min_samples_leaf=3, min_samples_split=14, splitter="best")), StackingEstimator( estimator=ExtraTreesRegressor(bootstrap=False, max_depth=4, max_features=0.7500000000000001, max_samples=None, min_samples_leaf=1, min_samples_split=10, n_estimators=1000)), Binarizer(threshold=0.9), ExtraTreesRegressor(bootstrap=False,
from sklearn.preprocessing import Normalizer from tpot.builtins import StackingEstimator, ZeroCount from xgboost import XGBClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:0.8666666666666668 exported_pipeline = make_pipeline( StackingEstimator(estimator=XGBClassifier(learning_rate=0.1, max_depth=3, min_child_weight=4, n_estimators=100, nthread=1, subsample=0.1)), Normalizer(norm="l1"), ZeroCount(), RandomForestClassifier(bootstrap=False, criterion="entropy", max_features=0.1, min_samples_leaf=11, min_samples_split=20, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.linear_model import LassoLarsCV from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import ZeroCount # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('../../input/train.csv', delimiter=',', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['class'], random_state=42) exported_pipeline = make_pipeline(ZeroCount(), LassoLarsCV(normalize=True)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
max_depth=1, min_child_weight=3, n_estimators=100, n_jobs=1, objective="reg:squarederror", subsample=0.9500000000000001, verbosity=0)), MinMaxScaler(), StackingEstimator(estimator=SGDRegressor(alpha=0.01, eta0=0.01, fit_intercept=False, l1_ratio=0.0, learning_rate="constant", loss="huber", penalty="elasticnet", power_t=0.0)), StackingEstimator(estimator=LinearSVR( C=25.0, dual=True, epsilon=0.1, loss="epsilon_insensitive", tol=0.0001)), FeatureAgglomeration(affinity="manhattan", linkage="complete"), SelectPercentile(score_func=f_regression, percentile=6), StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=False, max_features=0.8, min_samples_leaf=19, min_samples_split=10, n_estimators=400)), ZeroCount(), FeatureAgglomeration(affinity="l1", linkage="complete"), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import pandas as pd from sklearn.decomposition import FastICA from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator, ZeroCount from sklearn.preprocessing import FunctionTransformer from copy import copy # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:0.8453186610518303 exported_pipeline = make_pipeline( make_union(make_pipeline(ZeroCount(), FastICA(tol=0.2)), FunctionTransformer(copy)), ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.2, min_samples_leaf=1, min_samples_split=4, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
knr_params3 = {'n_neighbors' : 15} knr_params4 = {'n_neighbors' : 25} SEED = 0 level_1_models = [XgbWrapper(seed=SEED, params=xgb_params, cv_fold=4), ] # level_1_models = level_1_models + [SklearnWrapper(clf=KNeighborsRegressor, params=knr_params1), # SklearnWrapper(clf=KNeighborsRegressor, params=knr_params2), # SklearnWrapper(clf=KNeighborsRegressor, params=knr_params3), # SklearnWrapper(clf=KNeighborsRegressor, params=knr_params4)] level_1_models = level_1_models + [SklearnWrapper(make_pipeline( ZeroCount(), LassoLarsCV(normalize=True))),#LB 0.55797 SklearnWrapper(make_pipeline(StackingEstimator(estimator=LassoLarsCV(normalize=True)), StackingEstimator(estimator=GradientBoostingRegressor(learning_rate=0.001, loss="huber", max_depth=3, max_features=0.55, min_samples_leaf=18, min_samples_split=14, subsample=0.7)), LassoLarsCV())) ] params_list = [rf_params1, rf_params2, et_params1, et_params2, gb_params1, #gb_params2, rd_params, ls_params, eln_params, lcv_params, llcv_params ] func_list = [RandomForestRegressor, RandomForestRegressor, ExtraTreesRegressor, ExtraTreesRegressor,
def test_ZeroCount_fit(): """Assert that fit() in ZeroCount does nothing.""" op = ZeroCount() ret_op = op.fit(X) assert ret_op==op
# tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=None) # Average CV score on the training set was:-747046.8597394783 exported_pipeline = make_pipeline( StackingEstimator(estimator=ElasticNetCV(l1_ratio=1.0, tol=0.001)), FastICA(tol=0.8), PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), StackingEstimator(estimator=ExtraTreesRegressor(bootstrap=True, max_features=0.5, min_samples_leaf=14, min_samples_split=11, n_estimators=100)), ZeroCount(), MaxAbsScaler(), LassoLarsCV(normalize=False)) # exported_pipeline = TransformedTargetRegressor(regressor=exported_pipeline, transformer=QuantileTransformer(output_distribution='normal')) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) train_results = exported_pipeline.predict(training_features) from pylab import * # figure(1) # clf() # ion() # plot(training_target, train_results, 'bo') # plot(testing_target, results, 'ro') # show() competitions.plot_predict(exported_pipeline)
RobustScaler(), MinMaxScaler(), StackingEstimator(estimator=LinearSVR(C=25.0, dual=True, epsilon=0.01, loss="epsilon_insensitive", tol=0.0001)), StackingEstimator(estimator=DecisionTreeRegressor( max_depth=8, min_samples_leaf=17, min_samples_split=9)), FeatureAgglomeration(affinity="l2", linkage="average"), RBFSampler(gamma=0.75), StackingEstimator(estimator=LinearSVR(C=1.0, dual=True, epsilon=1.0, loss="squared_epsilon_insensitive", tol=0.1)), StackingEstimator( estimator=KNeighborsRegressor(n_neighbors=9, p=1, weights="uniform")), StackingEstimator(estimator=LassoLarsCV(normalize=True)), SelectPercentile(score_func=f_regression, percentile=26), StandardScaler(), PCA(iterated_power=7, svd_solver="randomized"), StackingEstimator(estimator=LinearSVR(C=10.0, dual=True, epsilon=0.01, loss="squared_epsilon_insensitive", tol=1e-05)), ZeroCount(), SelectFwe(score_func=f_regression, alpha=0.039), PCA(iterated_power=5, svd_solver="randomized"), RidgeCV()) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector, ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=96) # Average CV score on the training set was:0.7070745272525027 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=0, subset_list="subsets.csv"), ZeroCount(), ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.8, min_samples_leaf=1, min_samples_split=8, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
def test_ZeroCount_fit(): """Assert that fit() in ZeroCount does nothing.""" op = ZeroCount() ret_op = op.fit(X) assert ret_op == op
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=None) # Average CV score on the training set was: 0.6933333333333334 exported_pipeline = make_pipeline( VarianceThreshold(threshold=0.0005), StackingEstimator(estimator=XGBClassifier(learning_rate=0.1, max_depth=1, min_child_weight=17, n_estimators=100, nthread=1, subsample=0.6000000000000001)), StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=2, min_samples_leaf=19, min_samples_split=14)), StackingEstimator(estimator=BernoulliNB(alpha=1.0, True)), ZeroCount(), GradientBoostingClassifier(learning_rate=0.01, max_depth=4, max_features=0.8500000000000001, min_samples_leaf=6, min_samples_split=15, n_estimators=100, subsample=0.6500000000000001)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator, ZeroCount from xgboost import XGBClassifier from sklearn.preprocessing import FunctionTransformer from copy import copy # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:0.504247990815155 exported_pipeline = make_pipeline( make_union(FunctionTransformer(copy), ZeroCount()), XGBClassifier(learning_rate=0.001, max_depth=3, min_child_weight=3, n_estimators=100, nthread=1, subsample=0.1)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFwe, f_classif from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import ZeroCount from tpot.export_utils import set_param_recursive # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv("data.csv") features = tpot_data.drop('PSL_Won', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['PSL_Won'], random_state=42) # Average CV score on the training set was: 0.8671594508975712 exported_pipeline = make_pipeline( SelectFwe(score_func=f_classif, alpha=0.011), ZeroCount(), ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=1.0, min_samples_leaf=7, min_samples_split=20, n_estimators=100)) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 42) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) ## Plot from mlxtend.plotting import plot_confusion_matrix from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, mean_squared_error
import pandas as pd from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures, RobustScaler, StandardScaler from tpot.builtins import ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=None) # Average CV score on the training set was:0.8591858482523443 exported_pipeline = make_pipeline( StandardScaler(), ZeroCount(), PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), RobustScaler(), GradientBoostingClassifier(learning_rate=1.0, max_depth=6, max_features=0.45, min_samples_leaf=10, min_samples_split=15, n_estimators=100, subsample=0.7000000000000001)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from tpot.builtins import StackingEstimator, ZeroCount from sklearn.preprocessing import FunctionTransformer from copy import copy # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Average CV score on the training set was:0.8342718814237802 exported_pipeline = make_pipeline( make_union(ZeroCount(), FunctionTransformer(copy)), RFE(estimator=ExtraTreesClassifier(criterion="gini", max_features=0.6000000000000001, n_estimators=100), step=0.7000000000000001), ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.9000000000000001, min_samples_leaf=4, min_samples_split=15, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from sklearn.preprocessing import FunctionTransformer from copy import copy # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Average CV score on the training set was:0.84550605863897 exported_pipeline = make_pipeline( make_union( make_pipeline( OneHotEncoder(minimum_fraction=0.25, sparse=False, threshold=10), RFE(estimator=ExtraTreesClassifier(criterion="gini", max_features=0.5, n_estimators=100), step=0.2), ZeroCount(), MinMaxScaler()), FunctionTransformer(copy)), Normalizer(norm="max"), XGBClassifier(learning_rate=0.01, max_depth=6, min_child_weight=7, n_estimators=600, nthread=1, subsample=0.9500000000000001)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.feature_selection import SelectPercentile, f_classif from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=None) # Average CV score on the training set was:0.7291721934005595 exported_pipeline = make_pipeline( ZeroCount(), SelectPercentile(score_func=f_classif, percentile=66), LogisticRegression(C=0.0001, dual=False, penalty="l2")) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import MinMaxScaler from tpot.builtins import StackingEstimator, ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:0.47500297900297905 exported_pipeline = make_pipeline( make_union(MinMaxScaler(), make_pipeline(ZeroCount(), MinMaxScaler())), ZeroCount(), StackingEstimator(estimator=BernoulliNB(alpha=100.0, fit_prior=False)), LogisticRegression(C=0.01, dual=True, penalty="l2")) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import ZeroCount from xgboost import XGBRegressor # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:-0.0004955828805812525 exported_pipeline = make_pipeline( ZeroCount(), XGBRegressor(learning_rate=0.1, max_depth=8, min_child_weight=16, n_estimators=100, nthread=1, subsample=1.0)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import PolynomialFeatures from tpot.builtins import StackingEstimator, ZeroCount from xgboost import XGBRegressor # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=42) # Score on the training set was:-15.336456888232188 exported_pipeline = make_pipeline( SelectPercentile(score_func=f_regression, percentile=89), ZeroCount(), StackingEstimator( estimator=GradientBoostingRegressor(alpha=0.75, learning_rate=0.01, loss="quantile", max_depth=1, max_features=0.35000000000000003, min_samples_leaf=4, min_samples_split=17, n_estimators=100, subsample=0.9000000000000001)), PolynomialFeatures(degree=2, include_bias=False, interaction_only=False), XGBRegressor(learning_rate=0.01, max_depth=6, min_child_weight=9, n_estimators=100,