def automl_fit( input: Path, output: Path = Path("automl.bin"), target: str = None, ignore_cols: List[int] = typer.Option([]), evaluation_timeout: int = 5 * Min, memory_limit: int = 4 * Gb, search_timeout: int = 60 * 60, pop_size: int = 20, iterations: int = 100, random_state: int = None, format: str = None, ): """ 馃弮 Train an AutoML instance on a dataset. """ try: dataset = _load_dataset(format, input, ignore_cols) except ValueError as e: logger.error(f"鈿狅笍 Error: {str(e)}") return if target is None: target = dataset.columns[-1] columns = [c for c in dataset.columns if c != target] X = dataset[columns].values y = dataset[target].values automl = AutoML( output=VectorCategorical(), search_kwargs=dict( evaluation_timeout=evaluation_timeout, memory_limit=memory_limit, search_timeout=search_timeout, pop_size=pop_size, ), random_state=random_state, search_iterations=iterations, ) console.print(f"馃弮 Training on {len(dataset)} items.") automl.fit(X, y, logger=RichLogger()) with output.open("wb") as fp: automl.save(fp) console.print(f"馃捑 Saving model to [green]{output.absolute()}[/].")
search_algorithm=PESearch, input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]), output=Seq[Seq[Label]], registry=find_classes(exclude="Keras|Bert"), search_iterations=args.iterations, score_metric=meddocan.F1_beta, cross_validation_steps=1, pop_size=args.popsize, search_timeout=args.global_timeout, evaluation_timeout=args.timeout, memory_limit=args.memory * 1024 ** 3, ) # Basic logging configuration. loggers = [RichLogger()] if args.token: from autogoal.contrib.telegram import TelegramLogger telegram = TelegramLogger(token=args.token, name=f"MEDDOCAN", channel=args.channel,) loggers.append(telegram) # Finally, loading the MEDDOCAN dataset, running the `AutoML` instance, # and printing the results. X_train, y_train, X_test, y_test = meddocan.load(max_examples=args.examples) classifier.fit(X_train, y_train, logger=loggers) score = classifier.score(X_test, y_test)
continue # To evaluate how good a formula is, we simply feed the expression instance # with a sequence of numbers from 1 to 9. If the expression requires more # than 9 digits, it results in an error. The actual value of performing # corresponding operations is done in the `__call__` method of the expression classes. def evaluate(expr): def stream(): for i in range(1, 10): yield i raise ValueError("Too many values asked") return expr(stream()) # We will run 1000 iterations of each search strategy to compare their long-term performance. search_rand = RandomSearch(grammar, evaluate, errors="ignore") best_rand, best_fn_rand = search_rand.run(1000, logger=RichLogger()) search_pe = PESearch(grammar, evaluate, pop_size=10, errors="ignore") best_pe, best_fn_pe = search_pe.run(1000, logger=RichLogger()) # And here are the results. print(best_rand, best_fn_rand) print(best_pe, best_fn_pe)
# 驴C贸mo utilizamos esto en la clase AutoML? automl = AutoML( input=(Seq[Sentence], Supervised[VectorCategorical]), # **tipos de entrada** output=VectorCategorical, # **tipo de salida** # tenemos el par谩metro score_metric para definir la funci贸n objetivo, # que si no le fijamos un valor utiliza por defecto la funci贸n `autogoal.ml.metrics.accuracy`. ) # Ya hasta aqu铆 hemos definido el problema que queremos resolver # ahora solo nos resta ejecutar nuestro algoritmo, llamando al m茅todo `fit`. # Para monitorear el estado del proceso de AutoML, podemos pasar un logger al m茅todo `fit`. from autogoal.search import RichLogger # Entrenando... automl.fit(X_train, y_train, logger=RichLogger()) # Conociemdo que tan bueno es nuestro algoritmo score = automl.score(X_test, y_test) print(f"Score: {score:0.3f}") # Esto significa que nuestro algoritmo el mejor pipeline que encontr贸 report贸 un accuracy "result" # Tambi茅n puede llamarse al m茅todo predict que nos hace la predicci贸n para un conjunto de ejemplos # Prediciendo... predictions = automl.predict(X_test) for sentence, real, predicted in zip(X_test[:10], y_test, predictions): print(sentence, "-->", real, "vs", predicted)