Beispiel #1
0
def test_search_is_replayable_from_fitness_no_multiprocessing():
    search = RandomSearch(fitness_fn=fn2, evaluation_timeout=0, memory_limit=0)
    best, best_fn = search.run(10)

    sampler = best.sampler_.replay()

    assert best is sampler
    assert sampler._history != []
    assert best_fn == fn2(sampler)
Beispiel #2
0
def test_search_is_replayable_from_grammar():
    grammar = generate_cfg(A)
    search = RandomSearch(generator_fn=grammar, fitness_fn=fn)
    best, _ = search.run(1)

    sampler = best.sampler_.replay()
    best_clone = grammar(sampler)

    assert best == best_clone
        print(solution)
    except ValueError:
        continue

# To evaluate how good a formula is, we simply feed the expression instance
# with a sequence of numbers from 1 to 9. If the expression requires more
# than 9 digits, it results in an error. The actual value of performing
# corresponding operations is done in the `__call__` method of the expression classes.

def evaluate(expr):
    def stream():
        for i in range(1, 10):
            yield i

        raise ValueError("Too many values asked")

    return expr(stream())

# We will run 1000 iterations of each search strategy to compare their long-term performance.

search_rand = RandomSearch(grammar, evaluate, errors='ignore')
best_rand, best_fn_rand = search_rand.run(1000, logger=[ConsoleLogger(), ProgressLogger()])

search_pe = PESearch(grammar, evaluate, pop_size=10, errors='ignore')
best_pe, best_fn_pe = search_pe.run(1000, logger=[ConsoleLogger(), ProgressLogger()])

# And here are the results.

print(best_rand, best_fn_rand)
print(best_pe, best_fn_pe)
Beispiel #4
0
# ```bash
# LR(C=4.015231900472649, penalty='l2')
# LR(C=9.556786605505499, penalty='l2')
# LR(C=4.05716261883461, penalty='l1')
# LR(C=3.2786487445120858, penalty='l1')
# LR(C=4.655510386502897, penalty='l2')
# ```

# Now we can search for the best combination of constructor parameters by
# trying a bunch of different instances and see which one obtains the best score.
# AutoGOAL also has tools for automating this process.

from autogoal.search import RandomSearch

search = RandomSearch(grammar, evaluate, random_state=0)  # Fixed seed
best, score = search.run(100)

print("Best:", best, "\nScore:", score)

# The `RandomSearch` will try 100 different random instances, and for each one
# run the `evaluate` method we defined earlier. It returns the best one and the corresponding score.

# ```
# Best: LR(C=0.7043201482743121, penalty='l1')
# Score: 0.8853333333333337
# ```

# So we can do a little bit better by carefully selecting the right parameters.
# However, maybe we can do even better.

# ## Trying different algorithms
    try:
        with open(fR"{path}/binary_X",
                  'rb') as xfd, open(fR"{path}/binary_Y", 'rb') as yfd:
            X = pickle.load(xfd)
            y = pickle.load(yfd)
            return X, y
    except Exception as e:
        #TODO: implement corpus reading from directories
        print(e)
        pass


if __name__ == "__main__":
    g = generate_cfg(SklearnNLPClassifier)
    X, y = load_movie_reviews(100)

    # X, y = load_corpus("examples/Revolico")


    def fitness(pipeline):
        pipeline.fit(X, y)
        score = pipeline.score(X, y)
        return score

    search = RandomSearch(g,
                          fitness,
                          random_state=0,
                          errors='warn',
                          evaluation_timeout=100)
    result = search.run(50, logger=ProgressLogger())
Beispiel #6
0
        continue

# To evaluate how good a formula is, we simply feed the expression instance
# with a sequence of numbers from 1 to 9. If the expression requires more
# than 9 digits, it results in an error. The actual value of performing
# corresponding operations is done in the `__call__` method of the expression classes.


def evaluate(expr):
    def stream():
        for i in range(1, 10):
            yield i

        raise ValueError("Too many values asked")

    return expr(stream())


# We will run 1000 iterations of each search strategy to compare their long-term performance.

search_rand = RandomSearch(grammar, evaluate, errors="ignore")
best_rand, best_fn_rand = search_rand.run(1000, logger=RichLogger())

search_pe = PESearch(grammar, evaluate, pop_size=10, errors="ignore")
best_pe, best_fn_pe = search_pe.run(1000, logger=RichLogger())

# And here are the results.

print(best_rand, best_fn_rand)
print(best_pe, best_fn_pe)
fitness_fn = movie_reviews.make_fn(examples=100)

# ### Random search
#
# The `RandomSearch` strategy simply calls `grammar.sample()` a bunch of times
# and stores the best performing pipeline. It has no intelligence whatsoever,
# but it serves as a good baseline implementation.
#
# We will run it for a total of `1000` fitness evaluations, or equivalently, a total
# of `1000` different random pipelines. To see what's actually going on we will use
# the wonderfull `enlighten` library through our implementation `EnlightenLogger`.

logger = ProgressLogger()

random_search = RandomSearch(grammar, fitness_fn, random_state=0)
best_rand, fn_rand = random_search.run(1000, logger=logger)

# !!! note
#     For reproducibility purposes we can pass a fixed random seed in `random_state`.
#
# ### Evolutionary Search
#
# Random search is fun, but to search with purpose, we need a more intelligent sampling
# strategy. The `PESearch` (short for Probabilistic Evolutionary Search, phew), does just that.
# It starts with a random sampling strategy, but as it evaluates more pipelines, it modifies
# an probabilistic sampling model so that pipelines similar to the best ones found are more
# commonly sampled.
#
# There are three main parameters for `PESearch`.
#
# * The `pop_size` parameter indicates how many pipelines
Beispiel #8
0
from autogoal.contrib.sklearn import SklearnClassifier
from autogoal.grammar import generate_cfg
from autogoal.search import RandomSearch, ProgressLogger

from sklearn.datasets import make_classification

g = generate_cfg(SklearnClassifier)
X, y = make_classification()

print(g)


def fitness(pipeline):
    pipeline.fit(X, y)
    return pipeline.score(X, y)


search = RandomSearch(g, fitness, random_state=0, errors='warn')
search.run(1000, logger=ProgressLogger())