コード例 #1
0
def test_mut_operator_stats_update():
    """Asserts that self._random_mutation_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind)

    ind.statistics["crossover_count"] = random.randint(0, 10)
    ind.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind)] = tpot_obj._combine_individual_stats(2, 0.99, ind.statistics)

    for _ in range(10):
        offspring, = tpot_obj._random_mutation_operator(ind)

        assert offspring.statistics['crossover_count'] == ind.statistics['crossover_count']
        assert offspring.statistics['mutation_count'] == ind.statistics['mutation_count'] + 1
        assert offspring.statistics['predecessor'] == (str(ind),)

        ind = offspring
コード例 #2
0
ファイル: stats_test.py プロジェクト: EpistasisLab/tpot
def test_mut_operator_stats_update():
    """Asserts that self._random_mutation_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind)

    ind.statistics["crossover_count"] = random.randint(0, 10)
    ind.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind)] = tpot_obj._combine_individual_stats(2, 0.99, ind.statistics)

    for _ in range(10):
        offspring, = tpot_obj._random_mutation_operator(ind)
        
        assert offspring.statistics['crossover_count'] == ind.statistics['crossover_count']
        assert offspring.statistics['mutation_count'] == ind.statistics['mutation_count'] + 1
        assert offspring.statistics['predecessor'] == (str(ind),)

        ind = offspring
コード例 #3
0
def test_dict_initialization():
    """Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
    tpot = TPOTClassifier()
    tb = tpot._toolbox

    test_ind = tb.individual()
    initialize_stats_dict(test_ind)

    assert test_ind.statistics['generation'] == 0
    assert test_ind.statistics['crossover_count'] == 0
    assert test_ind.statistics['mutation_count'] == 0
    assert test_ind.statistics['predecessor'] == ('ROOT', )
コード例 #4
0
ファイル: stats_test.py プロジェクト: stenpiren/tpot
def test_dict_initialization():
    """Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
    tpot = TPOTClassifier()
    tb = tpot._toolbox

    test_ind = tb.individual()
    initialize_stats_dict(test_ind)

    assert test_ind.statistics['generation'] == 0
    assert test_ind.statistics['crossover_count'] == 0
    assert test_ind.statistics['mutation_count'] == 0
    assert test_ind.statistics['predecessor'] == ('ROOT',)
コード例 #5
0
def test_mate_operator_stats_update():
    """Assert that self._mate_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind1 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )
    ind2 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=True),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=2, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind1)
    initialize_stats_dict(ind2)

    # Randomly mutate the statistics
    ind1.statistics["crossover_count"] = random.randint(0, 10)
    ind1.statistics["mutation_count"] = random.randint(0, 10)
    ind2.statistics["crossover_count"] = random.randint(0, 10)
    ind2.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind1)] = tpot_obj._combine_individual_stats(2, 0.99, ind1.statistics)
    tpot_obj.evaluated_individuals_[str(ind2)] = tpot_obj._combine_individual_stats(2, 0.99, ind2.statistics)

    # Doing 10 tests
    for _ in range(10):
        offspring1, offspring2 = tpot_obj._mate_operator(ind1, ind2)

        assert offspring1.statistics['crossover_count'] == ind1.statistics['crossover_count'] + ind2.statistics['crossover_count'] + 1
        assert offspring1.statistics['mutation_count'] == ind1.statistics['mutation_count'] + ind2.statistics['mutation_count']
        assert offspring1.statistics['predecessor'] == (str(ind1), str(ind2))

        # Offspring replaces on of the two predecessors
        # Don't need to worry about cloning
        if random.random() < 0.5:
            ind1 = offspring1
        else:
            ind2 = offspring1
コード例 #6
0
ファイル: stats_test.py プロジェクト: EpistasisLab/tpot
def test_mate_operator_stats_update():
    """Assert that self._mate_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind1 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )
    ind2 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=True),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=2, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind1)
    initialize_stats_dict(ind2)

    # Randomly mutate the statistics
    ind1.statistics["crossover_count"] = random.randint(0, 10)
    ind1.statistics["mutation_count"] = random.randint(0, 10)
    ind2.statistics["crossover_count"] = random.randint(0, 10)
    ind2.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind1)] = tpot_obj._combine_individual_stats(2, 0.99, ind1.statistics)
    tpot_obj.evaluated_individuals_[str(ind2)] = tpot_obj._combine_individual_stats(2, 0.99, ind2.statistics)

    # Doing 10 tests
    for _ in range(10):
        offspring1, offspring2 = tpot_obj._mate_operator(ind1, ind2)

        assert offspring1.statistics['crossover_count'] == ind1.statistics['crossover_count'] + ind2.statistics['crossover_count'] + 1
        assert offspring1.statistics['mutation_count'] == ind1.statistics['mutation_count'] + ind2.statistics['mutation_count']
        assert offspring1.statistics['predecessor'] == (str(ind1), str(ind2))

        # Offspring replaces on of the two predecessors
        # Don't need to worry about cloning
        if random.random() < 0.5:
            ind1 = offspring1
        else:
            ind2 = offspring1
コード例 #7
0
ファイル: MetaTPOT_utils.py プロジェクト: DL1992/MetaTPOT
def MetaeaMuPlusLambda(population,
                       toolbox,
                       mu,
                       lambda_,
                       cxpb,
                       mutpb,
                       ngen,
                       pbar,
                       max_pipeline_size,
                       stats=None,
                       halloffame=None,
                       verbose=0,
                       meta_model=None,
                       per_generation_function=None,
                       primitives_to_hash_dic=None,
                       gptree=None,
                       pset=None,
                       df=None,
                       le=None,
                       use_meta_model_flag=True,
                       use_meta_selection_flag=False,
                       meta_selection_size=10,
                       meta_selection_type='offspring'):
    """This is the :math:`(\mu + \lambda)` evolutionary algorithm.
    :param population: A list of individuals.
    :param toolbox: A :class:`~deap.base.Toolbox` that contains the evolution
                    operators.
    :param mu: The number of individuals to select for the next generation.
    :param lambda_: The number of children to produce at each generation.
    :param cxpb: The probability that an offspring is produced by crossover.
    :param mutpb: The probability that an offspring is produced by mutation.
    :param ngen: The number of generation.
    :param pbar: processing bar
    :param stats: A :class:`~deap.tools.Statistics` object that is updated
                  inplace, optional.
    :param halloffame: A :class:`~deap.tools.HallOfFame` object that will
                       contain the best individuals, optional.
    :param verbose: Whether or not to log the statistics.
    :param per_generation_function: if supplied, call this function before each generation
                            used by tpot to save best pipeline before each new generation
    :returns: The final population
    :returns: A class:`~deap.tools.Logbook` with the statistics of the
              evolution.
    The algorithm takes in a population and evolves it in place using the
    :func:`varOr` function. It returns the optimized population and a
    :class:`~deap.tools.Logbook` with the statistics of the evolution. The
    logbook will contain the generation number, the number of evalutions for
    each generation and the statistics if a :class:`~deap.tools.Statistics` is
    given as argument. The *cxpb* and *mutpb* arguments are passed to the
    :func:`varOr` function. The pseudocode goes as follow ::
        evaluate(population)
        for g in range(ngen):
            offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)
            evaluate(offspring)
            population = select(population + offspring, mu)
    First, the individuals having an invalid fitness are evaluated. Second,
    the evolutionary loop begins by producing *lambda_* offspring from the
    population, the offspring are generated by the :func:`varOr` function. The
    offspring are then evaluated and the next generation population is
    selected from both the offspring **and** the population. Finally, when
    *ngen* generations are done, the algorithm returns a tuple with the final
    population and a :class:`~deap.tools.Logbook` of the evolution.
    This function expects :meth:`toolbox.mate`, :meth:`toolbox.mutate`,
    :meth:`toolbox.select` and :meth:`toolbox.evaluate` aliases to be
    registered in the toolbox. This algorithm uses the :func:`varOr`
    variation.
    """
    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])

    # Initialize statistics dict for the individuals in the population, to keep track of mutation/crossover operations and predecessor relations
    for ind in population:
        initialize_stats_dict(ind)

    population[:] = toolbox.evaluate(population)

    record = stats.compile(population) if stats is not None else {}
    logbook.record(gen=0, nevals=len(population), **record)

    # Begin the generational process
    for gen in range(1, ngen + 1):
        # after each population save a periodic pipeline
        if per_generation_function is not None:
            per_generation_function(gen=gen - 1, pop=population)

        # Vary the population
        offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)

        # Using meta-learning to reduce offspring
        if use_meta_model_flag:
            test_df = create_ranking_df_from_pop(offspring,
                                                 primitives_to_hash_dic,
                                                 gptree, pset, df,
                                                 max_pipeline_size)
            top_offspring_index = rank_pop(meta_model, test_df, mu, le)
            top_offspring = [offspring[i] for i in top_offspring_index]
            offspring = top_offspring

        # Using meta-learning as pree-tournament
        if use_meta_selection_flag:
            if meta_selection_type == 'offspring':
                offspring = _selMetaTournament(
                    offspring,
                    mu,
                    meta_selection_size,
                    meta_model=meta_model,
                    primitives_to_hash_dic=primitives_to_hash_dic,
                    gptree=gptree,
                    pset=pset,
                    df=df,
                    le=le,
                    max_pipeline_size=max_pipeline_size)
            elif meta_selection_type == 'pop':
                population = _selMetaTournament(
                    population,
                    mu,
                    meta_selection_size,
                    meta_model=meta_model,
                    primitives_to_hash_dic=primitives_to_hash_dic,
                    gptree=gptree,
                    pset=pset,
                    df=df,
                    le=le,
                    max_pipeline_size=max_pipeline_size)
            else:
                population = _selMetaTournament(
                    population,
                    mu,
                    meta_selection_size,
                    meta_model=meta_model,
                    primitives_to_hash_dic=primitives_to_hash_dic,
                    gptree=gptree,
                    pset=pset,
                    df=df,
                    le=le,
                    max_pipeline_size=max_pipeline_size)
                offspring = _selMetaTournament(
                    offspring,
                    mu,
                    meta_selection_size,
                    meta_model=meta_model,
                    primitives_to_hash_dic=primitives_to_hash_dic,
                    gptree=gptree,
                    pset=pset,
                    df=df,
                    le=le,
                    max_pipeline_size=max_pipeline_size)

        # Update generation statistic for all individuals which have invalid 'generation' stats
        # This hold for individuals that have been altered in the varOr function
        for ind in population:
            if ind.statistics['generation'] == 'INVALID':
                ind.statistics['generation'] = gen

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]

        offspring = toolbox.evaluate(offspring)

        # Select the next generation population
        population[:] = toolbox.select(population + offspring, mu)

        # pbar process
        if not pbar.disable:
            # Print only the best individual fitness
            if verbose == 2:
                high_score = max([
                    halloffame.keys[x].wvalues[1]
                    for x in range(len(halloffame.keys))
                ])
                pbar.write(
                    'Generation {0} - Current best internal CV score: {1}'.
                    format(gen, high_score))

            # Print the entire Pareto front
            elif verbose == 3:
                pbar.write(
                    'Generation {} - Current Pareto front scores:'.format(gen))
                for pipeline, pipeline_scores in zip(halloffame.items,
                                                     reversed(
                                                         halloffame.keys)):
                    pbar.write('{}\t{}\t{}'.format(
                        int(pipeline_scores.wvalues[0]),
                        pipeline_scores.wvalues[1], pipeline))
                pbar.write('')

        # Update the statistics with the new population
        record = stats.compile(population) if stats is not None else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
    if per_generation_function is not None:
        per_generation_function(gen=gen, pop=population)

    return population, logbook
コード例 #8
0
def extendedeaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, ngen, pbar,
                           stats=None, halloffame=None, verbose=0,
                           per_generation_function=None, debug=False,
                           random_seed=None, analysis=None, mutation_rate=None,
                           crossover_rate=None):
    """This is the :math:`(\mu + \lambda)` evolutionary algorithm.
    :param population: A list of individuals.
    :param toolbox: A :class:`~deap.base.Toolbox` that contains the evolution
                    operators.
    :param mu: The number of individuals to select for the next generation.
    :param lambda\_: The number of children to produce at each generation.
    :param cxpb: The probability that an offspring is produced by crossover.
    :param mutpb: The probability that an offspring is produced by mutation.
    :param ngen: The number of generation.
    :param pbar: processing bar
    :param stats: A :class:`~deap.tools.Statistics` object that is updated
                  inplace, optional.
    :param halloffame: A :class:`~deap.tools.HallOfFame` object that will
                       contain the best individuals, optional.
    :param verbose: Whether or not to log the statistics.
    :param per_generation_function: if supplied, call this function before each generation
                            used by tpot to save best pipeline before each new generation
    :returns: The final population
    :returns: A class:`~deap.tools.Logbook` with the statistics of the
              evolution.
    The algorithm takes in a population and evolves it in place using the
    :func:`varOr` function. It returns the optimized population and a
    :class:`~deap.tools.Logbook` with the statistics of the evolution. The
    logbook will contain the generation number, the number of evalutions for
    each generation and the statistics if a :class:`~deap.tools.Statistics` is
    given as argument. The *cxpb* and *mutpb* arguments are passed to the
    :func:`varOr` function. The pseudocode goes as follow ::
        evaluate(population)
        for g in range(ngen):
            offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)
            evaluate(offspring)
            population = select(population + offspring, mu)
    First, the individuals having an invalid fitness are evaluated. Second,
    the evolutionary loop begins by producing *lambda_* offspring from the
    population, the offspring are generated by the :func:`varOr` function. The
    offspring are then evaluated and the next generation population is
    selected from both the offspring **and** the population. Finally, when
    *ngen* generations are done, the algorithm returns a tuple with the final
    population and a :class:`~deap.tools.Logbook` of the evolution.
    This function expects :meth:`toolbox.mate`, :meth:`toolbox.mutate`,
    :meth:`toolbox.select` and :meth:`toolbox.evaluate` aliases to be
    registered in the toolbox. This algorithm uses the :func:`varOr`
    variation.
    """

    if random_seed == None:
        raise ValueError("No fixed random seed was used!")

    if random_seed is not None:
        random.seed(random_seed)
        np.random.seed(random_seed)

    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals', 'avg', 'std', 'min', 'max', 'raw'] + (stats.fields if stats else [])

    # Initialize statistics dict for the individuals in the population, to keep track of mutation/crossover operations and predecessor relations
    for ind in population:
        initialize_stats_dict(ind)

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in population if not ind.fitness.valid]

    fitnesses = toolbox.evaluate(invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    if halloffame is not None:
        halloffame.update(population)

    # calculate average fitness for the generation
    # ignore the -inf models
    complexity = np.array([fitnesses[i][0] for i in range(len(population))])
    fitnesses_only = np.array([fitnesses[i][1] for i in range(len(population))])
    n_inf = np.sum(np.isinf(fitnesses_only))
    print('Number of invalid pipelines: %d' %n_inf)
    fitnesses_only = fitnesses_only[~np.isinf(fitnesses_only)]

    record = stats.compile(population) if stats is not None else {}
    logbook.record(gen=0, nevals=len(invalid_ind),
                   avg=np.mean(fitnesses_only), std=np.std(fitnesses_only),
                   min=np.min(fitnesses_only), max=np.max(fitnesses_only),
                   raw=fitnesses_only, complexity=complexity,
                   **record)

    # save the optimal model for initial pipeline
    gen = 0
    if per_generation_function is not None:
        per_generation_function(gen)
    # Begin the generational process
    for gen in range(1, ngen + 1):
        # after each population save a periodic pipeline
        if per_generation_function is not None:
            per_generation_function(gen)

        # Vary the population
        offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)

        # Update generation statistic for all individuals which have invalid 'generation' stats
        # This hold for individuals that have been altered in the varOr function
        for ind in population:
            if ind.statistics['generation'] == 'INVALID':
                ind.statistics['generation'] = gen

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]

        # update pbar for valid individuals (with fitness values)
        if not pbar.disable:
            pbar.update(len(offspring)-len(invalid_ind))

        fitnesses = toolbox.evaluate(invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring)

        # Select the next generation population
        population[:] = toolbox.select(population + offspring, mu)

        # pbar process
        if not pbar.disable:
            # Print only the best individual fitness
            if verbose == 2:
                high_score = max([halloffame.keys[x].wvalues[1] for x in range(len(halloffame.keys))])
                pbar.write('Generation {0} - Current best internal CV score: {1}'.format(gen, high_score))

            # Print the entire Pareto front
            elif verbose == 3:
                pbar.write('Generation {} - Current Pareto front scores:'.format(gen))
                for pipeline, pipeline_scores in zip(halloffame.items, reversed(halloffame.keys)):
                    pbar.write('{}\t{}\t{}'.format(
                        int(pipeline_scores.wvalues[0]),
                        pipeline_scores.wvalues[1],
                        pipeline
                    )
                    )
                pbar.write('')

        # calculate average fitness for the generation
        # ignore the -inf models
        fitnesses_only = np.array([fitnesses[i][1] for i in range(len(offspring))])
        n_inf = np.sum(np.isinf(fitnesses_only))
        print('Number of invalid pipelines: %d' %n_inf)
        fitnesses_only = fitnesses_only[~np.isinf(fitnesses_only)]

        # Update the statistics with the new population
        record = stats.compile(population) if stats is not None else {}
        logbook.record(gen=gen, nevals=len(invalid_ind),
                       avg=np.mean(fitnesses_only), std=np.std(fitnesses_only),
                       min=np.min(fitnesses_only), max=np.max(fitnesses_only),
                       raw=fitnesses_only,
                       **record)
    # Dump logbook
    import pickle
    import pandas as pd
    deap_df = pd.DataFrame(logbook)
    save_path = get_all_random_seed_paths(analysis, ngen, len(population),
                                          debug, mutation_rate, crossover_rate)
    save_path_df = os.path.join(save_path, 'logbook_rnd_seed%03d.pkl'
                                %random_seed)
    with open(save_path_df, 'wb') as handle:
        pickle.dump(deap_df, handle)
    print('Saved logbook at %s' %save_path_df)

    return population, logbook