Exemple #1
0
def testTPOTOperatorClassFactory():
    """Assert that the TPOT operators class factory"""
    test_config_dict = {
        'sklearn.svm.LinearSVC': {
            'penalty': ["l1", "l2"],
            'loss': ["hinge", "squared_hinge"],
            'dual': [True, False],
            'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
            'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]
        },
        'sklearn.linear_model.LogisticRegression': {
            'penalty': ["l1", "l2"],
            'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
            'dual': [True, False]
        },
        'sklearn.preprocessing.Binarizer': {
            'threshold': np.arange(0.0, 1.01, 0.05)
        }
    }
    tpot_operator_list = []
    tpot_argument_list = []
    for key in sorted(test_config_dict.keys()):
        op, args = TPOTOperatorClassFactory(key, test_config_dict[key])
        tpot_operator_list.append(op)
        tpot_argument_list += args
    assert len(tpot_operator_list) == 3
    assert len(tpot_argument_list) == 9
    assert tpot_operator_list[0].root == True
    assert tpot_operator_list[1].root == False
    assert tpot_operator_list[2].type() == "Classifier or Regressor"
    assert tpot_argument_list[1].values == [True, False]
from tpot import TPOTClassifier, TPOTRegressor
from tpot.export_utils import export_pipeline, generate_import_code, _indent, \
    generate_pipeline_code, get_by_name, set_param_recursive
from tpot.operator_utils import TPOTOperatorClassFactory
from tpot.config.classifier import classifier_config_dict

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from deap import creator

from nose.tools import assert_raises, assert_equal, nottest
train_test_split = nottest(train_test_split)
test_operator_key_1 = 'sklearn.feature_selection.SelectPercentile'
test_operator_key_2 = 'sklearn.feature_selection.SelectFromModel'
TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory(
    test_operator_key_1,
    classifier_config_dict[test_operator_key_1]
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

digits_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
    def _fit_init(self):
        # initialization for fit function
        if not self.warm_start or not hasattr(self, '_pareto_front'):
            self._pop = []
            self._pareto_front = None
            self._last_optimized_pareto_front = None
            self._last_optimized_pareto_front_n_gens = 0

        self._optimized_pipeline = None
        self._optimized_pipeline_score = None
        self._exported_pipeline_text = ""
        self.fitted_pipeline_ = None
        self._fitted_imputer = None
        self._imputed = False
        self._memory = None # initial Memory setting for sklearn pipeline

        # dont save periodic pipelines more often than this
        self._output_best_pipeline_period_seconds = 30

        # Try crossover and mutation at most this many times for
        # any one given individual (or pair of individuals)
        self._max_mut_loops = 50

        self._setup_config(self.config_dict)

        self.operators = []
        self.arguments = []
        for key in sorted(self._config_dict.keys()):
            op_class, arg_types = TPOTOperatorClassFactory(
                key,
                self._config_dict[key],
                BaseClass=Operator,
                ArgBaseClass=ARGType
            )
            if op_class:
                self.operators.append(op_class)
                self.arguments += arg_types

        # Schedule TPOT to run for many generations if the user specifies a
        # run-time limit TPOT will automatically interrupt itself when the timer
        # runs out
        if self.max_time_mins is not None:
            self.generations = 1000000

        # Prompt the user if their version is out of date
        if not self.disable_update_check:
            update_check('tpot', __version__)

        if self.mutation_rate + self.crossover_rate > 1:
            raise ValueError(
                'The sum of the crossover and mutation probabilities must be <= 1.0.'
            )

        self.operators_context = {
            'make_pipeline': make_pipeline,
            'make_union': make_union,
            'StackingEstimator': StackingEstimator,
            'FunctionTransformer': FunctionTransformer,
            'copy': copy
        }

        self._pbar = None
        # Specifies where to output the progress messages (default: sys.stdout).
        # Maybe open this API in future version of TPOT.(io.TextIOWrapper or io.StringIO)
        self._file = sys.stdout

        # Dictionary of individuals that have already been evaluated in previous
        # generations
        self.evaluated_individuals_ = {}

        self._setup_scoring_function(self.scoring)

        if self.subsample <= 0.0 or self.subsample > 1.0:
            raise ValueError(
                'The subsample ratio of the training instance must be in the range (0.0, 1.0].'
            )

        if self.n_jobs == -1:
            self._n_jobs = cpu_count()
        else:
            self._n_jobs = self.n_jobs

        self._setup_pset()
        self._setup_toolbox()

        ## Additions to _fit_init
        # Initialise list to save the predictions and pipelines analysed by TPOT
        self.predictions = []
        self.pipelines = []
        self._exported_pipeline_text = []
        # Save training sample on the TPOT Object
        self.features = None
        self.target = None
        self.evaluated_individuals = {}
        self.curr_generations = 0
        self.log = {}

        # Add the Gaussian kernels so that they can be used by TPOT
        self.operators_context['RBF'] = eval('RBF')
        self.operators_context['Matern'] = eval('Matern')
        self.operators_context['RationalQuadratic'] = eval('RationalQuadratic')
        self.operators_context['ExpSineSquared'] = eval('ExpSineSquared')
        self.operators_context['DotProduct'] = eval('DotProduct')
        self.operators_context['ConstantKernel'] = eval('ConstantKernel')
Exemple #4
0
# Set up the MNIST data set for testing
mnist_data = load_digits()
training_features, testing_features, training_classes, testing_classes = \
    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)

# Set up the Boston data set for testing
boston_data = load_boston()
training_features_r, testing_features_r, training_classes_r, testing_classes_r = \
    train_test_split(boston_data.data, boston_data.target, random_state=42)

np.random.seed(42)
random.seed(42)

test_operator_key = 'sklearn.feature_selection.SelectKBest'
TPOTSelectKBest, TPOTSelectKBest_args = TPOTOperatorClassFactory(
    test_operator_key, classifier_config_dict[test_operator_key])


def test_driver():
    """Assert that the TPOT driver output normal result"""
    batcmd = "python -m tpot.driver tests.csv -is , -target class -g 2 -p 2 -os 4 -cv 5 -s 45 -v 1"
    ret_stdout = subprocess.check_output(batcmd, shell=True)
    try:
        ret_val = float(
            ret_stdout.decode('UTF-8').split('\n')[-2].split(': ')[-1])
    except:
        ret_val = -float('inf')
    assert ret_val > 0.0


def test_init_custom_parameters():
Exemple #5
0
from os import remove, path

from tpot import TPOTClassifier, TPOTRegressor
from tpot.export_utils import export_pipeline, generate_import_code, _indent, generate_pipeline_code, get_by_name
from tpot.operator_utils import TPOTOperatorClassFactory
from tpot.config.classifier import classifier_config_dict

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from deap import creator

from nose.tools import assert_raises, assert_equal

test_operator_key = 'sklearn.feature_selection.SelectPercentile'

TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory(
    test_operator_key, classifier_config_dict[test_operator_key])

mnist_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)


def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39)
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
Exemple #6
0
def rebuild_me(self):
    for a in NON_SERIALIZABLE:
        if not hasattr(self, a):
            setattr(self, a, [])
    self._setup_config(self.config_dict)
    self._setup_template(self.template)
    self.verbosity = 0
    self.warm_start = True
    for key in sorted(self._config_dict.keys()):
        op_class, arg_types = TPOTOperatorClassFactory(
            key,
            self._config_dict[key],
            BaseClass=Operator,
            ArgBaseClass=ARGType,
            verbose=self.verbosity,
        )
        if op_class:
            self.operators.append(op_class)
            self.arguments += arg_types
    self.operators_context = {
        "make_pipeline": make_pipeline,
        "make_union": make_union,
        "StackingEstimator": StackingEstimator,
        "FunctionTransformer": FunctionTransformer,
        "copy": copy,
    }
    setattr(
        self, '_pareto_front',
        tools.ParetoFront(similar=lambda ind1, ind2: np.allclose(
            ind1.fitness.values, ind2.fitness.values)))
    self._pbar = None
    self._setup_pset()
    self._setup_toolbox()
    self._pop = self._toolbox.population(n=self.population_size)
    if hasattr(self, 'pareto_front_fitted_pipelines_') and isinstance(
            self.pareto_front_fitted_pipelines_, dict):
        items = [
            creator.Individual(PrimitiveTree([]).from_string(i, self._pset))
            for i in self.pareto_front_fitted_pipelines_.keys()
        ]
        keys = [(lambda d: creator.FitnessMulti(
            (d.get('operator_count', 0), d.get('internal_cv_score', 0))))(
                self.evaluated_individuals_.get(k, {}))
                for k in self.pareto_front_fitted_pipelines_.keys()]
    elif hasattr(self, 'evaluated_individuals_') and isinstance(
            self.evaluated_individuals_, dict):
        items = [
            creator.Individual(PrimitiveTree([]).from_string(i, self._pset))
            for i in self.evaluated_individuals_.keys()
        ]
        keys = [
            creator.FitnessMulti((d.get('operator_count',
                                        0), d.get('internal_cv_score', 0)))
            for d in self.evaluated_individuals_.values()
        ]
    else:
        self.warm_start = False
        self.verbosity = 3
        return
    items = add_attr(keys, items)
    setattr(self._pareto_front, 'items', items)
    setattr(self._pareto_front, 'keys', sorted(keys))
    if hasattr(self, '_optimized_pipeline') and isinstance(
            self._optimized_pipeline, str):
        optimized_pipeline = creator.Individual(
            PrimitiveTree([]).from_string(self._optimized_pipeline,
                                          self._pset))
        optimized_pipeline.__str__ = partial(PrimitiveTree.__str__,
                                             optimized_pipeline)
        keys = [
            creator.FitnessMulti((d.get('operator_count',
                                        0), d.get('internal_cv_score', 0)))
            for k, d in self.evaluated_individuals_.items()
            if k == optimized_pipeline.__str__()
        ]
        if len(keys) > 0:
            optimized_pipeline.fitness = keys[0]
        else:
            optimized_pipeline.fitness.values = (5000.0, -float("inf"))
        self._optimized_pipeline = optimized_pipeline

    setattr(self, '_last_optimized_pareto_front',
            [v for i in self._pareto_front.keys for v in i.values[-1:]])

    if not hasattr(self, '_last_optimized_pareto_front_n_gens'):
        if hasattr(self, 'evaluated_individuals_'):
            last_gen = max([
                d.get('generation')
                for d in list(self.evaluated_individuals_.values())
            ])
        else:
            last_gen = 0
        setattr(self, '_last_optimized_pareto_front_n_gens', last_gen)
    else:
        last_gen = self._last_optimized_pareto_front_n_gens

    if not hasattr(self, 'evaluated_individuals_'):
        setattr(
            self, 'evaluated_individuals_', {
                p.__str__(): (lambda v: {
                    'generation': last_gen,
                    'mutation_count': 0,
                    'crossover_count': 0,
                    'predecessor': ('ROOT', ),
                    'operator_count': v[0],
                    'internal_cv_score': v[-1]
                })(self._pareto_front.keys[i].values)
                for i, p in enumerate(self._pareto_front.items)
            })

    self.verbosity = 3
    return self