def test_search_is_replayable_from_fitness_no_multiprocessing(): search = RandomSearch(fitness_fn=fn2, evaluation_timeout=0, memory_limit=0) best, best_fn = search.run(10) sampler = best.sampler_.replay() assert best is sampler assert sampler._history != [] assert best_fn == fn2(sampler)
def test_search_is_replayable_from_grammar(): grammar = generate_cfg(A) search = RandomSearch(generator_fn=grammar, fitness_fn=fn) best, _ = search.run(1) sampler = best.sampler_.replay() best_clone = grammar(sampler) assert best == best_clone
print(solution) except ValueError: continue # To evaluate how good a formula is, we simply feed the expression instance # with a sequence of numbers from 1 to 9. If the expression requires more # than 9 digits, it results in an error. The actual value of performing # corresponding operations is done in the `__call__` method of the expression classes. def evaluate(expr): def stream(): for i in range(1, 10): yield i raise ValueError("Too many values asked") return expr(stream()) # We will run 1000 iterations of each search strategy to compare their long-term performance. search_rand = RandomSearch(grammar, evaluate, errors='ignore') best_rand, best_fn_rand = search_rand.run(1000, logger=[ConsoleLogger(), ProgressLogger()]) search_pe = PESearch(grammar, evaluate, pop_size=10, errors='ignore') best_pe, best_fn_pe = search_pe.run(1000, logger=[ConsoleLogger(), ProgressLogger()]) # And here are the results. print(best_rand, best_fn_rand) print(best_pe, best_fn_pe)
# ```bash # LR(C=4.015231900472649, penalty='l2') # LR(C=9.556786605505499, penalty='l2') # LR(C=4.05716261883461, penalty='l1') # LR(C=3.2786487445120858, penalty='l1') # LR(C=4.655510386502897, penalty='l2') # ``` # Now we can search for the best combination of constructor parameters by # trying a bunch of different instances and see which one obtains the best score. # AutoGOAL also has tools for automating this process. from autogoal.search import RandomSearch search = RandomSearch(grammar, evaluate, random_state=0) # Fixed seed best, score = search.run(100) print("Best:", best, "\nScore:", score) # The `RandomSearch` will try 100 different random instances, and for each one # run the `evaluate` method we defined earlier. It returns the best one and the corresponding score. # ``` # Best: LR(C=0.7043201482743121, penalty='l1') # Score: 0.8853333333333337 # ``` # So we can do a little bit better by carefully selecting the right parameters. # However, maybe we can do even better. # ## Trying different algorithms
try: with open(fR"{path}/binary_X", 'rb') as xfd, open(fR"{path}/binary_Y", 'rb') as yfd: X = pickle.load(xfd) y = pickle.load(yfd) return X, y except Exception as e: #TODO: implement corpus reading from directories print(e) pass if __name__ == "__main__": g = generate_cfg(SklearnNLPClassifier) X, y = load_movie_reviews(100) # X, y = load_corpus("examples/Revolico") def fitness(pipeline): pipeline.fit(X, y) score = pipeline.score(X, y) return score search = RandomSearch(g, fitness, random_state=0, errors='warn', evaluation_timeout=100) result = search.run(50, logger=ProgressLogger())
continue # To evaluate how good a formula is, we simply feed the expression instance # with a sequence of numbers from 1 to 9. If the expression requires more # than 9 digits, it results in an error. The actual value of performing # corresponding operations is done in the `__call__` method of the expression classes. def evaluate(expr): def stream(): for i in range(1, 10): yield i raise ValueError("Too many values asked") return expr(stream()) # We will run 1000 iterations of each search strategy to compare their long-term performance. search_rand = RandomSearch(grammar, evaluate, errors="ignore") best_rand, best_fn_rand = search_rand.run(1000, logger=RichLogger()) search_pe = PESearch(grammar, evaluate, pop_size=10, errors="ignore") best_pe, best_fn_pe = search_pe.run(1000, logger=RichLogger()) # And here are the results. print(best_rand, best_fn_rand) print(best_pe, best_fn_pe)
fitness_fn = movie_reviews.make_fn(examples=100) # ### Random search # # The `RandomSearch` strategy simply calls `grammar.sample()` a bunch of times # and stores the best performing pipeline. It has no intelligence whatsoever, # but it serves as a good baseline implementation. # # We will run it for a total of `1000` fitness evaluations, or equivalently, a total # of `1000` different random pipelines. To see what's actually going on we will use # the wonderfull `enlighten` library through our implementation `EnlightenLogger`. logger = ProgressLogger() random_search = RandomSearch(grammar, fitness_fn, random_state=0) best_rand, fn_rand = random_search.run(1000, logger=logger) # !!! note # For reproducibility purposes we can pass a fixed random seed in `random_state`. # # ### Evolutionary Search # # Random search is fun, but to search with purpose, we need a more intelligent sampling # strategy. The `PESearch` (short for Probabilistic Evolutionary Search, phew), does just that. # It starts with a random sampling strategy, but as it evaluates more pipelines, it modifies # an probabilistic sampling model so that pipelines similar to the best ones found are more # commonly sampled. # # There are three main parameters for `PESearch`. # # * The `pop_size` parameter indicates how many pipelines
from autogoal.contrib.sklearn import SklearnClassifier from autogoal.grammar import generate_cfg from autogoal.search import RandomSearch, ProgressLogger from sklearn.datasets import make_classification g = generate_cfg(SklearnClassifier) X, y = make_classification() print(g) def fitness(pipeline): pipeline.fit(X, y) return pipeline.score(X, y) search = RandomSearch(g, fitness, random_state=0, errors='warn') search.run(1000, logger=ProgressLogger())