Exemple #1
0
def test_into_definition(variations_of_same_pipeline):

    expected_definition = """
        sklearn.pipeline.Pipeline:
            memory: null
            steps:
                - sklearn.decomposition._pca.PCA:
                    copy: true
                    iterated_power: auto
                    n_components: 2
                    random_state: null
                    svd_solver: auto
                    tol: 0.0
                    whiten: false
                - sklearn.pipeline.FeatureUnion:
                    n_jobs: null
                    transformer_list:
                    - sklearn.decomposition._pca.PCA:
                        copy: true
                        iterated_power: auto
                        n_components: 3
                        random_state: null
                        svd_solver: auto
                        tol: 0.0
                        whiten: false
                    - sklearn.pipeline.Pipeline:
                        memory: null
                        steps:
                        - sklearn.preprocessing._data.MinMaxScaler:
                            copy: true
                            feature_range:
                              - 0
                              - 1
                        - sklearn.decomposition._truncated_svd.TruncatedSVD:
                            algorithm: randomized
                            n_components: 2
                            n_iter: 5
                            random_state: null
                            tol: 0.0
                        verbose: false
                    transformer_weights: null
                    verbose: false
                - gordo.machine.model.models.KerasAutoEncoder:
                    kind: feedforward_hourglass
            verbose: false
        """

    expected_definition = yaml.safe_load(expected_definition)

    for pipe in variations_of_same_pipeline:

        definition = into_definition(from_definition(into_definition(pipe)))

        assert json.dumps(definition) == json.dumps(
            expected_definition
        ), f"Failed output:\n{definition}\nExpected:----------------\n{expected_definition}"
Exemple #2
0
def test_diff_detector_serializability(config):
    """
    Should play well with the gordo serializer
    """
    config = yaml.load(config)

    model = serializer.from_definition(config)
    serializer.into_definition(model)
    serialized_bytes = serializer.dumps(model)
    serializer.loads(serialized_bytes)
Exemple #3
0
def test_from_definition_test_model():
    model = DefinitionTestModel(300)
    definition = into_definition(model)
    expected_definition = {
        "tests.gordo.serializer.definition_test_model.DefinitionTestModel": {
            "depth": 300
        }
    }
    assert expected_definition == definition
Exemple #4
0
    def to_dict(self) -> dict:
        """
        Serialize this object into a dict representation, which can be used to
        initialize a new object after popping 'type' from the dict.

        Returns
        -------
        dict
        """
        return serializer.into_definition(self)
Exemple #5
0
def test_imputer_from_definition(config_str: str):
    """
    Ensure it plays well with the gordo serializer
    """
    config = yaml.safe_load(config_str)
    model = serializer.from_definition(config)

    if isinstance(model, Pipeline):
        assert isinstance(model.steps[-1][1], InfImputer)
    else:
        assert isinstance(model, InfImputer)

    serializer.from_definition(serializer.into_definition(model))
Exemple #6
0
def test_captures_kwarg_to_init():
    """
    Our models allow kwargs which are put into the underlying keras model or to construct
    the underlying model.
    We want to ensure into defintion captures kwargs which are part of the model
    parameters but not part of the __init__ signature
    """
    ae = KerasAutoEncoder(kind="feedforward_hourglass",
                          some_fancy_param="Howdy!")
    definition = into_definition(ae)
    parameters = definition[
        f"{KerasAutoEncoder.__module__}.{KerasAutoEncoder.__name__}"]
    assert "some_fancy_param" in parameters
    assert parameters["some_fancy_param"] == "Howdy!"

    # And make sure we can init again
    KerasAutoEncoder(**parameters)
Exemple #7
0
def test_into_from():
    """
    Pass Pipeline into definition, and then from that definition
    """
    from gordo.machine.model.transformer_funcs.general import multiply_by

    factories = register_model_builder.factories
    for model in factories.keys():

        for model_kind in factories[model].keys():
            pipe = Pipeline([
                ("step_0", PCA(n_components=2)),
                (
                    "step_1",
                    FeatureUnion([
                        ("step_0", PCA(n_components=3)),
                        (
                            "step_1",
                            Pipeline(steps=[
                                ("step_0", MinMaxScaler((0, 1))),
                                ("step_1", TruncatedSVD(n_components=2)),
                            ]),
                        ),
                    ]),
                ),
                (
                    "step_2",
                    FunctionTransformer(func=multiply_by,
                                        kw_args={"factor": 1}),
                ),
                (
                    "step_3",
                    pydoc.locate(f"gordo.machine.model.models.{model}")(
                        kind=model_kind),
                ),
            ])

            from_definition(into_definition(pipe))
Exemple #8
0
def build(
    machine_config: dict,
    output_dir: str,
    model_register_dir: click.Path,
    print_cv_scores: bool,
    model_parameter: List[Tuple[str, Any]],
    exceptions_reporter_file: str,
    exceptions_report_level: str,
):
    """
    Build a model and deposit it into 'output_dir' given the appropriate config
    settings.

    \b
    Parameters
    ----------
    machine_config: dict
        A dict loadable by :class:`gordo.machine.Machine.from_config`
    output_dir: str
        Directory to save model & metadata to.
    model_register_dir: path
        Path to a directory which will index existing models and their locations, used
        for re-using old models instead of rebuilding them. If omitted then always
        rebuild
    print_cv_scores: bool
        Print cross validation scores to stdout
    model_parameter: List[Tuple[str, Any]
        List of model key-values, wheres the values will be injected into the model
        config wherever there is a jinja variable with the key.
    exceptions_reporter_file: str
        JSON output file for exception information
    exceptions_report_level: str
        Details level for exception reporting
    """

    try:
        if model_parameter and isinstance(machine_config["model"], str):
            parameters = dict(model_parameter)  # convert lib of tuples to dict
            machine_config["model"] = expand_model(machine_config["model"],
                                                   parameters)

        machine: Machine = Machine.from_config(
            machine_config, project_name=machine_config["project_name"])

        logger.info(f"Building, output will be at: {output_dir}")
        logger.info(f"Register dir: {model_register_dir}")

        # Convert the config into a pipeline, and back into definition to ensure
        # all default parameters are part of the config.
        logger.debug(f"Ensuring the passed model config is fully expanded.")
        machine.model = serializer.into_definition(
            serializer.from_definition(machine.model))
        logger.info(f"Fully expanded model config: {machine.model}")

        builder = ModelBuilder(machine=machine)

        _, machine_out = builder.build(output_dir,
                                       model_register_dir)  # type: ignore

        logger.debug("Reporting built machine.")
        machine_out.report()
        logger.debug("Finished reporting.")

        if "err" in machine.name:
            raise FileNotFoundError("undefined_file.parquet")

        if print_cv_scores:
            for score in get_all_score_strings(machine_out):
                print(score)

    except Exception:
        traceback.print_exc()
        exc_type, exc_value, exc_traceback = sys.exc_info()

        exit_code = _exceptions_reporter.exception_exit_code(exc_type)
        if exceptions_reporter_file:
            _exceptions_reporter.safe_report(
                cast(
                    ReportLevel,
                    ReportLevel.get_by_name(exceptions_report_level,
                                            ReportLevel.EXIT_CODE),
                ),
                exc_type,
                exc_value,
                exc_traceback,
                exceptions_reporter_file,
                max_message_len=2024 - 500,
            )
        sys.exit(exit_code)
    else:
        return 0
Exemple #9
0
def test_from_into():
    """
    Create pipeline from definition, and create from that definition
    """
    factories = register_model_builder.factories
    for model in factories.keys():
        for model_kind in factories[model].keys():
            definition = f"""
                sklearn.pipeline.Pipeline:
                    steps:
                        - sklearn.decomposition.PCA:
                            n_components: 2
                            copy: true
                            whiten: false
                            svd_solver: auto
                            tol: 0.0
                            iterated_power: auto
                            random_state:
                        - sklearn.preprocessing._function_transformer.FunctionTransformer:
                            func: gordo.machine.model.transformer_funcs.general.multiply_by
                            kw_args:
                                factor: 1
                            inverse_func: gordo.machine.model.transformer_funcs.general.multiply_by
                            inv_kw_args:
                                factor: 1
                        - sklearn.pipeline.FeatureUnion:
                            transformer_list:
                            - sklearn.decomposition.PCA:
                                n_components: 3
                                copy: true
                                whiten: false
                                svd_solver: auto
                                tol: 0.0
                                iterated_power: auto
                                random_state:
                            - sklearn.pipeline.Pipeline:
                                steps:
                                - sklearn.preprocessing.MinMaxScaler:
                                    feature_range:
                                    - 0
                                    - 1
                                    copy: true
                                - sklearn.decomposition.truncated_svd.TruncatedSVD:
                                    n_components: 2
                                    algorithm: randomized
                                    n_iter: 5
                                    random_state:
                                    tol: 0.0
                                memory:
                                verbose: false
                            n_jobs: 1
                            transformer_weights:
                            verbose: false
                        - gordo.machine.model.models.{model}:
                            kind: {model_kind}
                    memory:
                    verbose: false
                """
            definition = yaml.safe_load(definition)
            pipe = from_definition(definition)
            into_definition(pipe)
Exemple #10
0
def build(
    machine_config: dict,
    output_dir: str,
    model_register_dir: click.Path,
    print_cv_scores: bool,
    model_parameter: List[Tuple[str, Any]],
):
    """
    Build a model and deposit it into 'output_dir' given the appropriate config
    settings.

    \b
    Parameters
    ----------
    machine_config: dict
        A dict loadable by :class:`gordo.machine.Machine.from_config`
    output_dir: str
        Directory to save model & metadata to.
    model_register_dir: path
        Path to a directory which will index existing models and their locations, used
        for re-using old models instead of rebuilding them. If omitted then always
        rebuild
    print_cv_scores: bool
        Print cross validation scores to stdout
    model_parameter: List[Tuple[str, Any]
        List of model key-values, wheres the values will be injected into the model
        config wherever there is a jinja variable with the key.
    """
    if model_parameter and isinstance(machine_config["model"], str):
        parameters = dict(model_parameter)  # convert lib of tuples to dict
        machine_config["model"] = expand_model(machine_config["model"],
                                               parameters)

    machine: Machine = Machine.from_config(
        machine_config, project_name=machine_config["project_name"])

    logger.info(f"Building, output will be at: {output_dir}")
    logger.info(f"Register dir: {model_register_dir}")

    # Convert the config into a pipeline, and back into definition to ensure
    # all default parameters are part of the config.
    logger.debug(f"Ensuring the passed model config is fully expanded.")
    machine.model = serializer.into_definition(
        serializer.from_definition(machine.model))
    logger.info(f"Fully expanded model config: {machine.model}")

    builder = ModelBuilder(machine=machine)

    try:
        _, machine_out = builder.build(output_dir,
                                       model_register_dir)  # type: ignore

        logger.debug("Reporting built machine.")
        machine_out.report()
        logger.debug("Finished reporting.")

        if print_cv_scores:
            for score in get_all_score_strings(machine_out):
                print(score)

    except Exception as e:
        exit_code = EXCEPTION_TO_EXITCODE.get(e.__class__, 1)
        traceback.print_exc()
        sys.exit(exit_code)
    else:
        return 0