def test_make_tpot_pmml_config(self): config = { "sklearn.kernel_approximation.RBFSampler" : {"gamma" : numpy.arange(0.0, 1.01, 0.05)}, "sklearn.preprocessing.StandardScaler" : {} } tpot_pmml_config = make_tpot_pmml_config(config) self.assertEqual({"sklearn.preprocessing.StandardScaler" : {}}, tpot_pmml_config)
def build_classifier(data, feature_pipeline, generations, population_size, name): X, y = data Xt = feature_pipeline.fit_transform(X) Xt = Xt.astype(float) categories = pandas.unique(y) config = make_tpot_pmml_config(classifier_config_dict) config = filter_config(config) del config[ "sklearn.naive_bayes.GaussianNB"] # Does not support nesting - see http://mantis.dmg.org/view.php?id=208 del config["sklearn.neighbors.KNeighborsClassifier"] del config[ "sklearn.svm.LinearSVC"] # Does not support classifier.predict_proba(Xt) del config["sklearn.tree.DecisionTreeClassifier"] classifier = TPOTClassifier(generations=generations, population_size=population_size, random_state=13, config_dict=config, verbosity=2) classifier.fit(Xt, y) pipeline = make_pmml_pipeline(Pipeline(steps=feature_pipeline.steps + classifier.fitted_pipeline_.steps), active_fields=X.columns.values, target_fields=[y.name]) print(repr(pipeline)) store_pkl(pipeline, name) result = DataFrame(classifier.predict(Xt), columns=[y.name]) if (len(categories) > 0): probabilities = DataFrame(classifier.predict_proba(Xt), columns=[ "probability(" + str(category) + ")" for category in categories ]) result = pandas.concat([result, probabilities], axis=1) store_csv(result, name)
def build_classifier(data, name): X, y = data categories = pandas.unique(y) config = make_tpot_pmml_config(classifier_config_dict) del config["sklearn.neighbors.KNeighborsClassifier"] classifier = TPOTClassifier(generations=1, population_size=3, random_state=13, config_dict=config, verbosity=2) classifier.fit(X, y) pipeline = make_pmml_pipeline(classifier.fitted_pipeline_, active_fields=X.columns.values, target_fields=[y.name]) print(repr(pipeline)) store_pkl(pipeline, name + ".pkl") result = DataFrame(classifier.predict(X), columns=[y.name]) if (len(categories) > 0): probabilities = DataFrame(classifier.predict_proba(X), columns=[ "probability(" + str(category) + ")" for category in categories ]) result = pandas.concat([result, probabilities], axis=1) store_csv(result, name + ".csv")
def build_regressor(data, name): X, y = data config = make_tpot_pmml_config(regressor_config_dict) del config["sklearn.neighbors.KNeighborsRegressor"] regressor = TPOTRegressor(generations = 3, population_size = 3, random_state = 13, config_dict = config, verbosity = 2) regressor.fit(X, y) pipeline = make_pmml_pipeline(regressor.fitted_pipeline_, active_fields = X.columns.values, target_fields = [y.name]) print(repr(pipeline)) store_pkl(pipeline, name) result = DataFrame(regressor.predict(X), columns = [y.name]) store_csv(result, name)
def build_regressor(data, feature_pipeline, generations, population_size, name): X, y = data Xt = feature_pipeline.fit_transform(X) Xt = Xt.astype(float) config = make_tpot_pmml_config(regressor_config_dict) config = filter_config(config) del config["sklearn.neighbors.KNeighborsRegressor"] regressor = TPOTRegressor(generations=generations, population_size=population_size, random_state=13, config_dict=config, verbosity=2) regressor.fit(Xt, y) pipeline = Pipeline(steps=feature_pipeline.steps + regressor.fitted_pipeline_.steps) pipeline = make_pmml_pipeline(pipeline, active_fields=X.columns.values, target_fields=[y.name]) print(repr(pipeline)) store_pkl(pipeline, name) result = DataFrame(regressor.predict(Xt), columns=[y.name]) store_csv(result, name)