def test_set_param_recursive_2(): """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel.""" pipeline_string = ( 'DecisionTreeRegressor(SelectFromModel(input_matrix, ' 'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, ' 'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,' 'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)' ) tpot_obj = TPOTRegressor() tpot_obj._fit_init() deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) set_param_recursive(sklearn_pipeline.steps, 'random_state', 42) assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42 assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
) TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory( test_operator_key_2, classifier_config_dict[test_operator_key_2] ) mnist_data = load_digits() training_features, testing_features, training_target, testing_target = \ train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42) tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj_reg = TPOTRegressor() tpot_obj_reg._fit_init() def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values
) TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory( test_operator_key_2, classifier_config_dict[test_operator_key_2] ) digits_data = load_digits() training_features, testing_features, training_target, testing_target = \ train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42) tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj_reg = TPOTRegressor() tpot_obj_reg._fit_init() def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1)
class PipelineEvaluator: __doc__ = """Tool to evaluate pipeline scores at Given Dataset.""" def __init__(self, dataset_path, json_path, n_jobs=1, config_dict=None, task="Classification"): self.scores = [] self.datasets_path = dataset_path self.JSON = json_path if task == "Classification": self.tpot = TPOTClassifier(population_size=1, generations=0, verbosity=0, n_jobs=n_jobs, config_dict=config_dict, warm_start=True) elif task == "Regression": self.tpot = TPOTRegressor(population_size=1, generations=0, verbosity=0, n_jobs=n_jobs, config_dict=config_dict, warm_start=True) else: raise ValueError self.tpot._fit_init() # Create _pset(PrimitiveSet) def evaluate(self, dataset_name, pipeline_list): """ Evaluate each pipeline of the given List and save split datasets at the given path. :param pipeline_list: List of Tuples, first index will be dataSet namefor example 'MNIST' and the second index will be Evaluate Pipeline number for example '1984'. :param split_datasets_save_path: Sting. Path to destination directory. :param train_test_split_size: Double. Double represent the test\train split ratio. :return: evaluated_individuals_, scores. Dictionary, Dictionary. Dictionary, key will be full pipeline as a string, value will be the pipeline score according to predict test result. Dictionary, key is predict of test result, value will be the pipeline as Individual. """ if type(pipeline_list) is not list: raise Exception("File not list") # -------------- DATASET -------------- X_train, y_train, X_test, y_test = load_split_dataset(self.datasets_path,dataset_name) pop = [] for pipeline in pipeline_list: # Search the origin full pipeline JSONDict = np.load(os.path.join(self.JSON, f'{pipeline[0]}.npy'), allow_pickle=True).item() pipeline_string = JSONDict['Evaluate Pipeline ' + str(int(pipeline[1])) + ':']['TEST PARSING PIPELINE'] # Create Individual object for Population List deap_pipeline = creator.Individual.from_string(pipeline_string, self.tpot._pset) pop.append(deap_pipeline) # Update tpot Object fields self.tpot.population_size = len(pop) self.tpot._pop = pop self.tpot.fit(X_train, y_train) for ind in pop: try: self.tpot._optimized_pipeline = ind self.tpot._summary_of_best_pipeline(X_train, y_train) ind_score = self.tpot.score(X_test, y_test) self.scores.append((ind_score, ind)) except Exception as e: self.scores.append((np.NaN, ind)) return self.tpot.evaluated_individuals_, self.scores