Ejemplo n.º 1
0
def automl_fit(
    input: Path,
    output: Path = Path("automl.bin"),
    target: str = None,
    ignore_cols: List[int] = typer.Option([]),
    evaluation_timeout: int = 5 * Min,
    memory_limit: int = 4 * Gb,
    search_timeout: int = 60 * 60,
    pop_size: int = 20,
    iterations: int = 100,
    random_state: int = None,
    format: str = None,
):
    """
    🏃 Train an AutoML instance on a dataset.
    """

    try:
        dataset = _load_dataset(format, input, ignore_cols)
    except ValueError as e:
        logger.error(f"⚠️  Error: {str(e)}")
        return

    if target is None:
        target = dataset.columns[-1]

    columns = [c for c in dataset.columns if c != target]

    X = dataset[columns].values
    y = dataset[target].values

    automl = AutoML(
        output=VectorCategorical(),
        search_kwargs=dict(
            evaluation_timeout=evaluation_timeout,
            memory_limit=memory_limit,
            search_timeout=search_timeout,
            pop_size=pop_size,
        ),
        random_state=random_state,
        search_iterations=iterations,
    )

    console.print(f"🏃 Training on {len(dataset)} items.")
    automl.fit(X, y, logger=RichLogger())

    with output.open("wb") as fp:
        automl.save(fp)

    console.print(f"💾 Saving model to [green]{output.absolute()}[/].")
Ejemplo n.º 2
0
    search_algorithm=PESearch,
    input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]),
    output=Seq[Seq[Label]],
    registry=find_classes(exclude="Keras|Bert"),
    search_iterations=args.iterations,
    score_metric=meddocan.F1_beta,
    cross_validation_steps=1,
    pop_size=args.popsize,
    search_timeout=args.global_timeout,
    evaluation_timeout=args.timeout,
    memory_limit=args.memory * 1024 ** 3,
)

# Basic logging configuration.

loggers = [RichLogger()]

if args.token:
    from autogoal.contrib.telegram import TelegramLogger

    telegram = TelegramLogger(token=args.token, name=f"MEDDOCAN", channel=args.channel,)
    loggers.append(telegram)

# Finally, loading the MEDDOCAN dataset, running the `AutoML` instance,
# and printing the results.

X_train, y_train, X_test, y_test = meddocan.load(max_examples=args.examples)

classifier.fit(X_train, y_train, logger=loggers)
score = classifier.score(X_test, y_test)
Ejemplo n.º 3
0
        continue

# To evaluate how good a formula is, we simply feed the expression instance
# with a sequence of numbers from 1 to 9. If the expression requires more
# than 9 digits, it results in an error. The actual value of performing
# corresponding operations is done in the `__call__` method of the expression classes.


def evaluate(expr):
    def stream():
        for i in range(1, 10):
            yield i

        raise ValueError("Too many values asked")

    return expr(stream())


# We will run 1000 iterations of each search strategy to compare their long-term performance.

search_rand = RandomSearch(grammar, evaluate, errors="ignore")
best_rand, best_fn_rand = search_rand.run(1000, logger=RichLogger())

search_pe = PESearch(grammar, evaluate, pop_size=10, errors="ignore")
best_pe, best_fn_pe = search_pe.run(1000, logger=RichLogger())

# And here are the results.

print(best_rand, best_fn_rand)
print(best_pe, best_fn_pe)
Ejemplo n.º 4
0
# ¿Cómo utilizamos esto en la clase AutoML?
automl = AutoML(
    input=(Seq[Sentence], Supervised[VectorCategorical]),  # **tipos de entrada**
    output=VectorCategorical,  # **tipo de salida**
    # tenemos el parámetro score_metric  para definir la función objetivo,
    # que si no le fijamos un valor utiliza por defecto la función `autogoal.ml.metrics.accuracy`.
)

# Ya hasta aquí hemos definido el problema que queremos resolver
# ahora solo nos resta ejecutar nuestro algoritmo, llamando al método `fit`.

# Para monitorear el estado del proceso de AutoML, podemos pasar un logger al método `fit`.
from autogoal.search import RichLogger

# Entrenando...
automl.fit(X_train, y_train, logger=RichLogger())

# Conociemdo que tan bueno es nuestro algoritmo
score = automl.score(X_test, y_test)
print(f"Score: {score:0.3f}")

# Esto significa que nuestro algoritmo el mejor pipeline que encontró reportó un accuracy "result"

# También puede llamarse al método predict que nos hace la predicción para un conjunto de ejemplos

# Prediciendo...
predictions = automl.predict(X_test)

for sentence, real, predicted in zip(X_test[:10], y_test, predictions):
    print(sentence, "-->", real, "vs", predicted)