def _build_pipeline(): builder = build_pipelines(input=Tuple(List(List(Flags())), List(List(Category()))), output=List(List(Category())), registry=find_classes(include="CRF")) return builder.sample(sampler=Sampler(random_state=0))
def contrib_list( verbose: bool = False, include: str = None, exclude: str = None, input: str = None, output: str = None, ): """ ⚙️ List all currently available contrib algorithms. """ classes = find_classes(include=include, exclude=exclude, input=input, output=output) classes_by_contrib = collections.defaultdict(list) max_cls_name_length = 0 for cls in classes: max_cls_name_length = max(max_cls_name_length, len(cls.__name__)) classes_by_contrib[str(cls).split(".")[2]].append(cls) typer.echo(f"⚙️ Found a total of {len(classes)} matching algorithms.", color="blue") for contrib, clss in classes_by_contrib.items(): typer.echo(f"🛠️ {contrib}: {len(clss)} algorithms.") if verbose: for cls in clss: sig = inspect.signature(cls.run) typer.echo( f" 🔹 {cls.__name__.ljust(max_cls_name_length)} : {sig.parameters['input'].annotation} -> {sig.return_annotation}" )
def make_algorithms_table(): from autogoal.contrib import find_classes all_classes = find_classes() with open(Path(__file__).parent / "guide" / "algorithms.md", "w") as fp: fp.write( textwrap.dedent( """ |Algorithm|Dependencies|Input|Output| |--|--|--|--| """ ) ) for clss in all_classes: print(clss) signature = inspect.signature(clss.run) dependency = clss.__module__.split(".")[2] if dependency.startswith("_"): dependency = "" fp.write( f"| {clss.__name__} | {dependency} | {signature.parameters['input'].annotation} | {signature.return_annotation} | \n" )
def make_pipeline_builder(self): registry = self.registry or find_classes( include=self.include_filter, exclude=self.exclude_filter ) return build_pipeline_graph( input_types=self.input, output_type=self.output, registry=registry, )
def _build_pipeline(): builder = build_pipeline_graph( input_types=(Seq[Seq[FeatureSet]], Supervised[Seq[Seq[Label]]]), output_type=Seq[Seq[Label]], registry=find_classes(include="CRF"), ) return builder.sample(sampler=Sampler(random_state=0))
def _make_pipeline_builder(self): registry = self.registry or find_classes(include=self.include_filter, exclude=self.exclude_filter) return build_pipelines( input=Tuple(self.input, self.output), output=self.output, registry=registry, )
def test_matrix_classification_pipeline_uses_keras_classifier(): pipelines = build_pipeline_graph( input_types=(MatrixContinuousDense, Supervised[VectorCategorical]), output_type=VectorCategorical, registry=find_classes("Keras"), ) nodes = pipelines.nodes() assert KerasClassifier in nodes assert KerasImageClassifier not in nodes assert KerasSequenceClassifier not in nodes assert KerasSequenceTagger not in nodes
def load_algorithms(self, path: Path): """ Load piplien algorithms list from given path """ with open(path / "algorithms.yml", "r") as fd: algorithms = yaml.safe_load(fd) autogoal_algorithms = find_classes() answer = [] algorithm_clases = [] for i, algorithm in enumerate(algorithms.get("algorithms")): for cls in autogoal_algorithms: if algorithm in object.__str__(cls): algorithm_clases.append(cls) answer.append(cls.load(path / "algorithms" / str(i))) return answer
parser.add_argument("--popsize", type=int, default=50) parser.add_argument("--selection", type=int, default=10) parser.add_argument("--global-timeout", type=int, default=None) parser.add_argument("--examples", type=int, default=None) parser.add_argument("--token", default=None) parser.add_argument("--channel", default=None) args = parser.parse_args() print(args) # The next line will print all the algorithms that AutoGOAL found # in the `contrib` library, i.e., anything that could be potentially used # to solve an AutoML problem. for cls in find_classes(): print("Using: %s" % cls.__name__) # ## Experimentation # Instantiate the classifier. # Note that the input and output types here are defined to match the problem statement, # i.e., text classification. classifier = AutoML( search_algorithm=PESearch, input=List(Sentence()), output=CategoricalVector(), search_iterations=args.iterations, score_metric=f1_score, search_kwargs=dict(
print(args) # ## Experimentation # Instantiate the classifier. # Note that the input and output types here are defined to match the problem statement, # i.e., entity recognition. from autogoal.contrib import find_classes classifier = AutoML( search_algorithm=PESearch, input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]), output=Seq[Seq[Label]], registry=find_classes(exclude="Keras|Bert"), search_iterations=args.iterations, score_metric=meddocan.F1_beta, cross_validation_steps=1, pop_size=args.popsize, search_timeout=args.global_timeout, evaluation_timeout=args.timeout, memory_limit=args.memory * 1024 ** 3, ) # Basic logging configuration. loggers = [RichLogger()] if args.token: from autogoal.contrib.telegram import TelegramLogger
def build_pipelines(self): st.write("# Pipelines") st.write("This example illustrates how AutoGOAL automatically builds " "a graph of pipelines for different problems settings.") from autogoal.kb._data import DATA_TYPES types_str = [cls.__name__ for cls in DATA_TYPES] st.write(""" AutoGOAL pipeline discovery is based on a hierarchy of semantic datatypes. Each type represents a semantic datum that can be used in a machine learning algorithm, from matrices and vectors to sentences, entities and and images. The following picture shows all available semantic data types. You can click the top right corner to enlarge. """) st.image("/code/docs/guide/datatypes.png", use_column_width=True) from autogoal.contrib import find_classes all_classes = {k.__name__: k for k in find_classes()} st.write(f""" ## Algorithm Library AutoGOAL automatically builds pipelines by selecting from a wide range of algorithms implemented in `contrib` modules. The list of all available algorithms is shown here. There are a total of **{len(all_classes)}** algorithms implemented. Select one to display some information. """) class_name = st.selectbox("Select an algorithm", list(all_classes)) class_type = all_classes[class_name] st.write(f"### {class_type.__module__}.{class_name}") run_signature = inspect.signature(class_type.run) st.write( f"**Input type**: {run_signature.parameters['input'].annotation}") st.write(f"**Output type**: {run_signature.return_annotation}") st.write("#### Parameters") params = [] for name, param in inspect.signature( class_type.__init__).parameters.items(): if name == 'self': continue params.append(f"* **{name}**: {param.annotation}") st.write("\n".join(params)) st.write("## Pipeline Builder") st.write(""" AutoGOAL can automatically build pipelines given a desired input and output value. It uses the annotations of the `run` method of each algorithm to detect which algorithms can be connected. In the following section, you can select a desired input and output types and explore the pipelines that AutoGOAL discovers. In the left sidebar you can fine-tune the input value, e.g., make it a list of elements instead of a single element. """) st.sidebar.markdown("### Configure input and output types") list_input = st.sidebar.number_input("Input list (level)", 0, 3, 1) list_output = st.sidebar.number_input("Output list (level)", 0, 3, 0) tuples = st.sidebar.checkbox("Is supervised (use Tuple in input)", True) input_type = st.selectbox("Select an input type", types_str, types_str.index('Sentence')) output_type = st.selectbox("Select and output type", types_str, types_str.index('CategoricalVector')) input_type = input_type + "()" for i in range(list_input): input_type = f"List({input_type})" output_type = output_type + "()" for i in range(list_output): input_type = f"List({output_type})" if tuples: input_type = f"Tuple({input_type}, {output_type})" st.write(f"#### Defined input type: `{input_type}`") st.write(f"#### Defined output type: `{output_type}`") st.write(""" The following code uses explicitely AutoGOAL's pipeline discovery engine to find all the pipelines that can be constructed from the desired input to the desired output. """) code = textwrap.dedent(f""" from autogoal.kb import * from autogoal.kb import build_pipelines from autogoal.contrib import find_classes # explicitly build the graph of pipelines space = build_pipelines( input={input_type}, output={output_type}, registry=find_classes(), ) """) st.code(code) try: space = eval_code(code, "space") except Exception as e: if "No pipelines can be constructed" in str(e): st.error(str(e)) st.info( "Try changing the input and output type or select **Is supervised** in the left sidebar." ) return raise st.write(""" ### The Pipelines Graph This is the graph that represents all the posible pipelines find by AutoGOAL. Each node in this graph is an algorithm from the _Algorithm Library_ that is compatible with the input and output types of its neighbors. Any path from the top to the bottom of the graph represents a valid pipeline. """) graph = nx.DiGraph() def get_node_repr(node): try: return get_node_repr(node.inner) except: return dict(label=str(node).split(".")[-1], module=node.__module__.split("_")[0]) for node in space.graph.nodes: attrs = get_node_repr(node) graph.add_node(attrs["label"], **attrs) for u, v in space.graph.edges: graph.add_edge( get_node_repr(u)["label"], get_node_repr(v)["label"]) pos = nx.nx_pydot.pydot_layout(graph, prog="dot", root=space.Start) chart = (nxa.draw_networkx( graph, pos=pos, node_color="module", node_tooltip="label").properties(height=500).interactive()) st.altair_chart(chart, use_container_width=True) st.write(""" ### Example Pipeline Here is an example pipeline that has been randomly sampled from the previous graph. You can try different samples. Notice how not only the nodes (algorithms) that participate in the pipeline are different each time, but also their internal hyperparameters change. When sampling a pipeline from the graph AutoGOAL samples all the internal hyperparameters as defined by the constructor. When these hyperparameters have complex values (e.g., an algorithm per-se), AutoGOAL recursively samples instances of the internal algorithms, and so on. """) st.code(space.sample()) st.button("Sample another pipeline")
from autogoal.contrib import find_classes from autogoal.kb import * from autogoal.kb import build_pipelines, build_pipeline_graph from autogoal.contrib.spacy import SpacyNLP from autogoal.contrib._wrappers import FlagsMerger import logging logging.basicConfig(level=logging.INFO) pipeline_space = build_pipeline_graph( input=List(Sentence()), output=MatrixContinuousDense(), registry=find_classes(), # registry=[SpacyNLP, FlagsMerger], # max_list_depth=1, ) for i in range(10): pipeline = pipeline_space.sample() print(pipeline)
import pytest from autogoal.contrib import find_classes from autogoal.grammar import generate_cfg, Sampler from autogoal.exceptions import InterfaceIncompatibleError classes = find_classes() @pytest.mark.contrib @pytest.mark.parametrize("clss", classes) def test_create_grammar_for_generated_class(clss): try: generate_cfg(clss, registry=classes) except InterfaceIncompatibleError: pass @pytest.mark.slow @pytest.mark.contrib @pytest.mark.parametrize("clss", classes) def test_sample_generated_class(clss): grammar = generate_cfg(clss, registry=classes) sampler = Sampler(random_state=0) for _ in range(1000): grammar.sample(sampler=sampler)
# ## Experimentation # Instantiate the classifier. # Note that the input and output types here are defined to match the problem statement, # i.e., entity recognition. classifier = AutoML( search_algorithm=PESearch, input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]), output=Seq[Seq[Label]], score_metric=meddocan.F1_beta, cross_validation_steps=1, # Since we only want to try neural networks, we restrict # the contrib registry to algorithms matching with `Keras`. registry=find_classes("Keras|Bert"), # We need to give some extra time because neural networks are slow evaluation_timeout=300, search_timeout=1800, ) # Basic logging configuration. loggers = [RichLogger()] # Finally, loading the MEDDOCAN dataset, running the `AutoML` instance, # and printing the results. X_train, y_train, X_test, y_test = meddocan.load() classifier.fit(X_train, y_train, logger=loggers)
from autogoal.contrib import find_classes # ## Experimentation # Instantiate the classifier. # Note that the input and output types here are defined to match the problem statement, # i.e., entity recognition. classifier = AutoML( search_algorithm=PESearch, input=(Tensor4, Supervised[VectorCategorical]), output=VectorCategorical, cross_validation_steps=1, # Since we only want to try neural networks, we restrict # the contrib registry to algorithms matching with `Keras`. registry=find_classes("Keras"), errors="raise", # Since image classifiers are heavy to train, let's give them a longer timeout... evaluation_timeout=5 * Min, search_timeout=1 * Hour, ) # Basic logging configuration. loggers = [RichLogger()] # Finally, loading the CIFAR dataset, running the `AutoML` instance, # and printing the results. from autogoal.datasets import cifar10
# Una vez que tenemos listo nuestro algoritmo solo nos queda indicarle a la clase AutoML que lo utilice en la búsqueda # Estos son algunos import que nos hacen falta más adelante from autogoal.ml import AutoML from autogoal.contrib import find_classes # Probemos con HAHA from autogoal.datasets import haha # Cargando los datos X_train, y_train, X_test, y_test = haha.load() # Creando la instancia de AutoML con nuestra clase automl = AutoML( input=(Seq[Sentence], Supervised[VectorCategorical]), # **tipos de entrada** output=VectorCategorical, # **tipo de salida** # Agregando nuestra clase y todo el resto de algortimos de AutoGOAL registry=[NewAlgorithm] + find_classes(), ) # Ahora sencillamente tenemos que ejecutar AutoML y ya nuestro algoritmo aparecerá en algunos pipelines. # Debemos tener en cuenta que esto no garantiza qeu aparezca en el mejor pipeline encontrado, sino que se conectará # con el resto de los algoritmo como si fuera nativo de AutoGOAL. automl.fit(X_train, y_train) score = automl.score(X_test, y_test) print(score)