Esempio n. 1
0
def _build_pipeline():
    builder = build_pipelines(input=Tuple(List(List(Flags())),
                                          List(List(Category()))),
                              output=List(List(Category())),
                              registry=find_classes(include="CRF"))

    return builder.sample(sampler=Sampler(random_state=0))
Esempio n. 2
0
def contrib_list(
    verbose: bool = False,
    include: str = None,
    exclude: str = None,
    input: str = None,
    output: str = None,
):
    """
    ⚙️ List all currently available contrib algorithms.
    """
    classes = find_classes(include=include,
                           exclude=exclude,
                           input=input,
                           output=output)
    classes_by_contrib = collections.defaultdict(list)
    max_cls_name_length = 0

    for cls in classes:
        max_cls_name_length = max(max_cls_name_length, len(cls.__name__))
        classes_by_contrib[str(cls).split(".")[2]].append(cls)

    typer.echo(f"⚙️  Found a total of {len(classes)} matching algorithms.",
               color="blue")

    for contrib, clss in classes_by_contrib.items():
        typer.echo(f"🛠️  {contrib}: {len(clss)} algorithms.")

        if verbose:
            for cls in clss:
                sig = inspect.signature(cls.run)
                typer.echo(
                    f" 🔹 {cls.__name__.ljust(max_cls_name_length)} : {sig.parameters['input'].annotation} -> {sig.return_annotation}"
                )
Esempio n. 3
0
def make_algorithms_table():
    from autogoal.contrib import find_classes

    all_classes = find_classes()

    with open(Path(__file__).parent / "guide" / "algorithms.md", "w") as fp:
        fp.write(
            textwrap.dedent(
                """
            |Algorithm|Dependencies|Input|Output|
            |--|--|--|--|
            """
            )
        )

        for clss in all_classes:
            print(clss)
            signature = inspect.signature(clss.run)
            dependency = clss.__module__.split(".")[2]

            if dependency.startswith("_"):
                dependency = ""

            fp.write(
                f"| {clss.__name__} | {dependency} | {signature.parameters['input'].annotation} | {signature.return_annotation} | \n"
            )
Esempio n. 4
0
    def make_pipeline_builder(self):
        registry = self.registry or find_classes(
            include=self.include_filter, exclude=self.exclude_filter
        )

        return build_pipeline_graph(
            input_types=self.input, output_type=self.output, registry=registry,
        )
Esempio n. 5
0
def _build_pipeline():
    builder = build_pipeline_graph(
        input_types=(Seq[Seq[FeatureSet]], Supervised[Seq[Seq[Label]]]),
        output_type=Seq[Seq[Label]],
        registry=find_classes(include="CRF"),
    )

    return builder.sample(sampler=Sampler(random_state=0))
Esempio n. 6
0
    def _make_pipeline_builder(self):
        registry = self.registry or find_classes(include=self.include_filter,
                                                 exclude=self.exclude_filter)

        return build_pipelines(
            input=Tuple(self.input, self.output),
            output=self.output,
            registry=registry,
        )
Esempio n. 7
0
def test_matrix_classification_pipeline_uses_keras_classifier():
    pipelines = build_pipeline_graph(
        input_types=(MatrixContinuousDense, Supervised[VectorCategorical]),
        output_type=VectorCategorical,
        registry=find_classes("Keras"),
    )

    nodes = pipelines.nodes()

    assert KerasClassifier in nodes
    assert KerasImageClassifier not in nodes
    assert KerasSequenceClassifier not in nodes
    assert KerasSequenceTagger not in nodes
Esempio n. 8
0
    def load_algorithms(self, path: Path):
        """
        Load piplien algorithms list from given path
        """
        with open(path / "algorithms.yml", "r") as fd:
            algorithms = yaml.safe_load(fd)

        autogoal_algorithms = find_classes()

        answer = []

        algorithm_clases = []

        for i, algorithm in enumerate(algorithms.get("algorithms")):
            for cls in autogoal_algorithms:
                if algorithm in object.__str__(cls):
                    algorithm_clases.append(cls)
                    answer.append(cls.load(path / "algorithms" / str(i)))

        return answer
Esempio n. 9
0
parser.add_argument("--popsize", type=int, default=50)
parser.add_argument("--selection", type=int, default=10)
parser.add_argument("--global-timeout", type=int, default=None)
parser.add_argument("--examples", type=int, default=None)
parser.add_argument("--token", default=None)
parser.add_argument("--channel", default=None)

args = parser.parse_args()

print(args)

# The next line will print all the algorithms that AutoGOAL found
# in the `contrib` library, i.e., anything that could be potentially used
# to solve an AutoML problem.

for cls in find_classes():
    print("Using: %s" % cls.__name__)

# ## Experimentation

# Instantiate the classifier.
# Note that the input and output types here are defined to match the problem statement,
# i.e., text classification.

classifier = AutoML(
    search_algorithm=PESearch,
    input=List(Sentence()),
    output=CategoricalVector(),
    search_iterations=args.iterations,
    score_metric=f1_score,
    search_kwargs=dict(
Esempio n. 10
0
print(args)

# ## Experimentation

# Instantiate the classifier.
# Note that the input and output types here are defined to match the problem statement,
# i.e., entity recognition.

from autogoal.contrib import find_classes

classifier = AutoML(
    search_algorithm=PESearch,
    input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]),
    output=Seq[Seq[Label]],
    registry=find_classes(exclude="Keras|Bert"),
    search_iterations=args.iterations,
    score_metric=meddocan.F1_beta,
    cross_validation_steps=1,
    pop_size=args.popsize,
    search_timeout=args.global_timeout,
    evaluation_timeout=args.timeout,
    memory_limit=args.memory * 1024 ** 3,
)

# Basic logging configuration.

loggers = [RichLogger()]

if args.token:
    from autogoal.contrib.telegram import TelegramLogger
Esempio n. 11
0
    def build_pipelines(self):
        st.write("# Pipelines")

        st.write("This example illustrates how AutoGOAL automatically builds "
                 "a graph of pipelines for different problems settings.")

        from autogoal.kb._data import DATA_TYPES

        types_str = [cls.__name__ for cls in DATA_TYPES]

        st.write("""
            AutoGOAL pipeline discovery is based on a hierarchy of semantic datatypes.
            Each type represents a semantic datum that can be used in a machine learning algorithm,
            from matrices and vectors to sentences, entities and and images.

            The following picture shows all available semantic data types.
            You can click the top right corner to enlarge.
            """)

        st.image("/code/docs/guide/datatypes.png", use_column_width=True)

        from autogoal.contrib import find_classes

        all_classes = {k.__name__: k for k in find_classes()}

        st.write(f"""
            ## Algorithm Library

            AutoGOAL automatically builds pipelines by selecting from a wide range of algorithms
            implemented in `contrib` modules.
            The list of all available algorithms is shown here.

            There are a total of **{len(all_classes)}** algorithms implemented.
            Select one to display some information.
            """)

        class_name = st.selectbox("Select an algorithm", list(all_classes))
        class_type = all_classes[class_name]

        st.write(f"### {class_type.__module__}.{class_name}")

        run_signature = inspect.signature(class_type.run)
        st.write(
            f"**Input type**: {run_signature.parameters['input'].annotation}")
        st.write(f"**Output type**: {run_signature.return_annotation}")

        st.write("#### Parameters")
        params = []
        for name, param in inspect.signature(
                class_type.__init__).parameters.items():
            if name == 'self':
                continue

            params.append(f"* **{name}**: {param.annotation}")
        st.write("\n".join(params))

        st.write("## Pipeline Builder")

        st.write("""
            AutoGOAL can automatically build pipelines given a desired input and output
            value. It uses the annotations of the `run` method of each algorithm to detect
            which algorithms can be connected.

            In the following section, you can select a desired input and output types and 
            explore the pipelines that AutoGOAL discovers.
            In the left sidebar you can fine-tune the input value, e.g., make it a list
            of elements instead of a single element.
            """)

        st.sidebar.markdown("### Configure input and output types")
        list_input = st.sidebar.number_input("Input list (level)", 0, 3, 1)
        list_output = st.sidebar.number_input("Output list (level)", 0, 3, 0)
        tuples = st.sidebar.checkbox("Is supervised (use Tuple in input)",
                                     True)

        input_type = st.selectbox("Select an input type", types_str,
                                  types_str.index('Sentence'))

        output_type = st.selectbox("Select and output type", types_str,
                                   types_str.index('CategoricalVector'))

        input_type = input_type + "()"
        for i in range(list_input):
            input_type = f"List({input_type})"

        output_type = output_type + "()"
        for i in range(list_output):
            input_type = f"List({output_type})"

        if tuples:
            input_type = f"Tuple({input_type}, {output_type})"

        st.write(f"#### Defined input type:  `{input_type}`")
        st.write(f"#### Defined output type: `{output_type}`")

        st.write("""
            The following code uses explicitely AutoGOAL's pipeline discovery engine
            to find all the pipelines that can be constructed from the desired
            input to the desired output.
            """)

        code = textwrap.dedent(f"""
            from autogoal.kb import *
            from autogoal.kb import build_pipelines
            from autogoal.contrib import find_classes

            # explicitly build the graph of pipelines
            space = build_pipelines(
                input={input_type},
                output={output_type},
                registry=find_classes(),
            )
            """)

        st.code(code)

        try:
            space = eval_code(code, "space")
        except Exception as e:
            if "No pipelines can be constructed" in str(e):
                st.error(str(e))
                st.info(
                    "Try changing the input and output type or select **Is supervised** in the left sidebar."
                )
                return

            raise

        st.write("""
            ### The Pipelines Graph
            
            This is the graph that represents all the posible pipelines find by AutoGOAL.
            Each node in this graph is an algorithm from the _Algorithm Library_ that is
            compatible with the input and output types of its neighbors.
            Any path from the top to the bottom of the graph represents a valid pipeline.
            """)

        graph = nx.DiGraph()

        def get_node_repr(node):
            try:
                return get_node_repr(node.inner)
            except:
                return dict(label=str(node).split(".")[-1],
                            module=node.__module__.split("_")[0])

        for node in space.graph.nodes:
            attrs = get_node_repr(node)
            graph.add_node(attrs["label"], **attrs)

        for u, v in space.graph.edges:
            graph.add_edge(
                get_node_repr(u)["label"],
                get_node_repr(v)["label"])

        pos = nx.nx_pydot.pydot_layout(graph, prog="dot", root=space.Start)
        chart = (nxa.draw_networkx(
            graph, pos=pos, node_color="module",
            node_tooltip="label").properties(height=500).interactive())

        st.altair_chart(chart, use_container_width=True)

        st.write("""
            ### Example Pipeline
            
            Here is an example pipeline that has been randomly sampled from the previous graph.
            You can try different samples. Notice how not only the nodes (algorithms) that participate
            in the pipeline are different each time, but also their internal hyperparameters change.
            
            When sampling a pipeline from the graph AutoGOAL samples all the internal
            hyperparameters as defined by the constructor.
            When these hyperparameters have complex values (e.g., an algorithm per-se), AutoGOAL
            recursively samples instances of the internal algorithms, and so on.
            """)

        st.code(space.sample())

        st.button("Sample another pipeline")
Esempio n. 12
0
from autogoal.contrib import find_classes
from autogoal.kb import *
from autogoal.kb import build_pipelines, build_pipeline_graph

from autogoal.contrib.spacy import SpacyNLP
from autogoal.contrib._wrappers import FlagsMerger

import logging

logging.basicConfig(level=logging.INFO)

pipeline_space = build_pipeline_graph(
    input=List(Sentence()),
    output=MatrixContinuousDense(),
    registry=find_classes(),
    # registry=[SpacyNLP, FlagsMerger],
    # max_list_depth=1,
)

for i in range(10):
    pipeline = pipeline_space.sample()
    print(pipeline)
Esempio n. 13
0
import pytest

from autogoal.contrib import find_classes
from autogoal.grammar import generate_cfg, Sampler
from autogoal.exceptions import InterfaceIncompatibleError

classes = find_classes()


@pytest.mark.contrib
@pytest.mark.parametrize("clss", classes)
def test_create_grammar_for_generated_class(clss):
    try:
        generate_cfg(clss, registry=classes)
    except InterfaceIncompatibleError:
        pass


@pytest.mark.slow
@pytest.mark.contrib
@pytest.mark.parametrize("clss", classes)
def test_sample_generated_class(clss):
    grammar = generate_cfg(clss, registry=classes)
    sampler = Sampler(random_state=0)

    for _ in range(1000):
        grammar.sample(sampler=sampler)
Esempio n. 14
0
# ## Experimentation

# Instantiate the classifier.
# Note that the input and output types here are defined to match the problem statement,
# i.e., entity recognition.

classifier = AutoML(
    search_algorithm=PESearch,
    input=(Seq[Seq[Word]], Supervised[Seq[Seq[Label]]]),
    output=Seq[Seq[Label]],
    score_metric=meddocan.F1_beta,
    cross_validation_steps=1,
    # Since we only want to try neural networks, we restrict
    # the contrib registry to algorithms matching with `Keras`.
    registry=find_classes("Keras|Bert"),
    # We need to give some extra time because neural networks are slow
    evaluation_timeout=300,
    search_timeout=1800,
)

# Basic logging configuration.

loggers = [RichLogger()]

# Finally, loading the MEDDOCAN dataset, running the `AutoML` instance,
# and printing the results.

X_train, y_train, X_test, y_test = meddocan.load()

classifier.fit(X_train, y_train, logger=loggers)
Esempio n. 15
0
from autogoal.contrib import find_classes

# ## Experimentation

# Instantiate the classifier.
# Note that the input and output types here are defined to match the problem statement,
# i.e., entity recognition.

classifier = AutoML(
    search_algorithm=PESearch,
    input=(Tensor4, Supervised[VectorCategorical]),
    output=VectorCategorical,
    cross_validation_steps=1,
    # Since we only want to try neural networks, we restrict
    # the contrib registry to algorithms matching with `Keras`.
    registry=find_classes("Keras"),
    errors="raise",
    # Since image classifiers are heavy to train, let's give them a longer timeout...
    evaluation_timeout=5 * Min,
    search_timeout=1 * Hour,
)

# Basic logging configuration.

loggers = [RichLogger()]

# Finally, loading the CIFAR dataset, running the `AutoML` instance,
# and printing the results.

from autogoal.datasets import cifar10
Esempio n. 16
0
# Una vez que tenemos listo nuestro algoritmo solo nos queda indicarle a la clase AutoML que lo utilice en la búsqueda

# Estos son algunos import que nos hacen falta más adelante
from autogoal.ml import AutoML
from autogoal.contrib import find_classes

# Probemos con HAHA
from autogoal.datasets import haha

# Cargando los datos
X_train, y_train, X_test, y_test = haha.load()

# Creando la instancia de AutoML con nuestra clase
automl = AutoML(
    input=(Seq[Sentence],
           Supervised[VectorCategorical]),  # **tipos de entrada**
    output=VectorCategorical,  # **tipo de salida**
    # Agregando nuestra clase y todo el resto de algortimos de AutoGOAL
    registry=[NewAlgorithm] + find_classes(),
)

# Ahora sencillamente tenemos que ejecutar AutoML y ya nuestro algoritmo aparecerá en algunos pipelines.
# Debemos tener en cuenta que esto no garantiza qeu aparezca en el mejor pipeline encontrado, sino que se conectará
# con el resto de los algoritmo como si fuera nativo de AutoGOAL.

automl.fit(X_train, y_train)

score = automl.score(X_test, y_test)
print(score)