Example #1
0
def test_set_param_recursive_2():
    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
    pipeline_string = (
        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
    )
    tpot_obj = TPOTRegressor()
    tpot_obj._fit_init()
    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)

    assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
Example #2
0
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

mnist_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
Example #3
0
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

digits_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
Example #4
0
class PipelineEvaluator:
    __doc__ = """Tool to evaluate pipeline scores at Given Dataset."""

    def __init__(self, dataset_path, json_path, n_jobs=1, config_dict=None, task="Classification"):
        self.scores = []
        self.datasets_path = dataset_path

        self.JSON = json_path

        if task == "Classification":
            self.tpot = TPOTClassifier(population_size=1, generations=0, verbosity=0,
                                       n_jobs=n_jobs, config_dict=config_dict, warm_start=True)
        elif task == "Regression":
            self.tpot = TPOTRegressor(population_size=1, generations=0, verbosity=0,
                                       n_jobs=n_jobs, config_dict=config_dict, warm_start=True)
        else:
            raise ValueError

        self.tpot._fit_init()  # Create _pset(PrimitiveSet)

    def evaluate(self, dataset_name, pipeline_list):
        """ Evaluate each pipeline of the given  List and save split datasets at the given path.

        :param pipeline_list: List of Tuples, first index will be dataSet namefor example 'MNIST'
        and the second index will be Evaluate Pipeline number for example '1984'.
        :param split_datasets_save_path: Sting. Path to destination directory.
        :param train_test_split_size: Double. Double represent the test\train split ratio.
        :return: evaluated_individuals_, scores. Dictionary, Dictionary.
                    Dictionary, key will be full pipeline as a string, value will
                            be the pipeline score according to predict test result.
                    Dictionary, key is predict of test result, value will
                            be the pipeline as Individual.
        """
        if type(pipeline_list) is not list:
            raise Exception("File not list")

        #  -------------- DATASET --------------
        X_train, y_train, X_test, y_test = load_split_dataset(self.datasets_path,dataset_name)
        pop = []

        for pipeline in pipeline_list:
            # Search the origin full pipeline
            JSONDict = np.load(os.path.join(self.JSON, f'{pipeline[0]}.npy'), allow_pickle=True).item()
            pipeline_string = JSONDict['Evaluate Pipeline ' + str(int(pipeline[1])) + ':']['TEST PARSING PIPELINE']

            # Create Individual object for Population List
            deap_pipeline = creator.Individual.from_string(pipeline_string, self.tpot._pset)
            pop.append(deap_pipeline)

        # Update tpot Object fields
        self.tpot.population_size = len(pop)
        self.tpot._pop = pop
        self.tpot.fit(X_train, y_train)
        for ind in pop:
            try:
                self.tpot._optimized_pipeline = ind
                self.tpot._summary_of_best_pipeline(X_train, y_train)
                ind_score = self.tpot.score(X_test, y_test)
                self.scores.append((ind_score, ind))
            except Exception as e:
                self.scores.append((np.NaN, ind))

        return self.tpot.evaluated_individuals_, self.scores