Example #1
0
def test_checkpoint_function_sample_transfomer():

    X = np.arange(20, dtype=int).reshape(10, 2)
    samples = [mario.Sample(data, key=str(i)) for i, data in enumerate(X)]
    offset = 3
    oracle = X + offset

    with tempfile.TemporaryDirectory() as d:
        model_path = os.path.join(d, "model.pkl")
        features_dir = os.path.join(d, "features")

        transformer = mario.wrap(
            [FunctionTransformer, "sample", "checkpoint"],
            func=_offset_add_func,
            kw_args=dict(offset=offset),
            validate=True,
            model_path=model_path,
            features_dir=features_dir,
        )

        features = transformer.transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, True)

        features = transformer.fit_transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, True)
        _assert_delayed_samples(features)

        # remove all files and call fit_transform again
        shutil.rmtree(d)
        features = transformer.fit_transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, True)

    # test when both model_path and features_dir is None
    transformer = mario.wrap(
        [FunctionTransformer, "sample", "checkpoint"],
        func=_offset_add_func,
        kw_args=dict(offset=offset),
        validate=True,
    )
    features = transformer.transform(samples)
    _assert_all_close_numpy_array(oracle, [s.data for s in features])

    # test when both model_path and features_dir is None
    with tempfile.TemporaryDirectory() as dir_name:
        transformer = mario.wrap(
            [FunctionTransformer, "sample", "checkpoint"],
            func=_offset_add_func,
            kw_args=dict(offset=offset),
            validate=True,
            features_dir=dir_name,
            hash_fn=hash_string,
        )

        features = transformer.transform(samples)
        # Checking if we can cast the has as integer
        assert isinstance(int(features[0]._load.args[0].split("/")[-2]), int)

        _assert_all_close_numpy_array(oracle, [s.data for s in features])
Example #2
0
def _build_transformer(path, i):

    features_dir = os.path.join(path, f"transformer{i}")
    estimator = mario.wrap(
        [DummyTransformer, "sample", "checkpoint"], i=i, features_dir=features_dir
    )
    return estimator
Example #3
0
    def _run(dask_enabled):

        X = np.ones(shape=(10, 2), dtype=int)
        samples_transform = mario.SampleSet(
            [mario.Sample(data, key=str(i)) for i, data in enumerate(X)], key="1"
        )
        offset = 2
        oracle = X + offset

        with tempfile.TemporaryDirectory() as d:
            pipeline = Pipeline(
                [(f"{i}", _build_transformer(d, i)) for i in range(offset)]
            )
            if dask_enabled:
                pipeline = mario.wrap(["dask"], pipeline)
                transformed_samples = pipeline.transform([samples_transform]).compute(
                    scheduler="single-threaded"
                )
            else:
                transformed_samples = pipeline.transform([samples_transform])

            _assert_all_close_numpy_array(
                oracle,
                [s.data for sample_set in transformed_samples for s in sample_set],
            )
            assert np.all([len(s) == 10 for s in transformed_samples])
Example #4
0
    def _run(dask_enabled):
        X = np.ones(shape=(10, 2), dtype=int)
        samples = [mario.Sample(data, key=str(i)) for i, data in enumerate(X)]
        samples_transform = [
            mario.Sample(data, key=str(i + 10)) for i, data in enumerate(X)
        ]
        oracle = X + 2

        with tempfile.TemporaryDirectory() as d:
            fitter = ("0", _build_estimator(d, 0))
            transformer = ("1", _build_transformer(d, 1))
            pipeline = Pipeline([fitter, transformer])
            if dask_enabled:
                pipeline = mario.wrap(["dask"], pipeline, fit_tag="GPU", npartitions=1)
                pipeline = pipeline.fit(samples)
                tags = mario.dask_tags(pipeline)

                assert len(tags) == 1, tags
                transformed_samples = pipeline.transform(samples_transform)

                transformed_samples = transformed_samples.compute(
                    scheduler="single-threaded"
                )
            else:
                pipeline = pipeline.fit(samples)
                transformed_samples = pipeline.transform(samples_transform)

            _assert_all_close_numpy_array(oracle, [s.data for s in transformed_samples])
def test_mod_4hz():
    """Loading and running the mod-4hz annotator."""
    # Test setup and config
    annotator = bob.bio.base.load_resource("mod-4hz", "annotator")
    assert isinstance(annotator, bob.bio.spear.annotator.Mod_4Hz)

    # Read input
    rate, wav = _wav()

    # Test the VAD annotator
    annotator = bob.bio.spear.annotator.Mod_4Hz()
    _compare(
        annotator.transform_one(wav, sample_rate=rate),
        pkg_resources.resource_filename(
            "bob.bio.spear.test", "data/vad_mod_4hz.hdf5"
        ),
    )

    # Test the processing of Sample objects and tags of annotator transformer
    wrapped_annotator = wrap(["sample"], annotator)
    samples = [Sample(data=wav, rate=rate)]
    # Attribute `rate` should be passed as `sample_rate` argument of transform (tags)
    result = wrapped_annotator.transform(samples)
    # Annotations should be in attribute `annotations` of result samples (tags)
    _compare(
        result[0].annotations,
        pkg_resources.resource_filename(
            "bob.bio.spear.test", "data/vad_mod_4hz.hdf5"
        ),
    )
Example #6
0
def wrap_sample_preprocessor(
        preprocessor,
        transform_extra_arguments=(("annotations", "annotations"), ),
        **kwargs,
):
    """
    Wraps :any:`bob.bio.base.preprocessor.Preprocessor` with
    :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper`

    .. warning::
       This wrapper doesn't checkpoint data

    Parameters
    ----------

    preprocessor: :any:`bob.bio.base.preprocessor.Preprocessor`
       Instance of :any:`bob.bio.base.transformers.PreprocessorTransformer` to be wrapped

    transform_extra_arguments: [tuple]
        Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments`

    """

    transformer = PreprocessorTransformer(preprocessor)
    return mario.wrap(
        ["sample"],
        transformer,
        transform_extra_arguments=transform_extra_arguments,
    )
Example #7
0
    def _run(dask_enabled):
        X = np.ones(shape=(10, 2), dtype=int)
        samples = [mario.Sample(data, key=str(i)) for i, data in enumerate(X)]
        samples_transform = [
            mario.Sample(data, key=str(i + 10)) for i, data in enumerate(X)
        ]
        oracle = X + 2

        with tempfile.TemporaryDirectory() as d:
            fitter = ("0", _build_estimator(d, 0))
            transformer = (
                "1",
                _build_transformer(d, 1),
            )

            pipeline = Pipeline([fitter, transformer])
            if dask_enabled:
                dask_client = _get_local_client()
                pipeline = mario.wrap(["dask"], pipeline)
                pipeline = pipeline.fit(samples)
                transformed_samples = pipeline.transform(samples_transform).compute(
                    scheduler=dask_client
                )
            else:
                pipeline = pipeline.fit(samples)
                transformed_samples = pipeline.transform(samples_transform)

            _assert_all_close_numpy_array(oracle, [s.data for s in transformed_samples])
Example #8
0
def test_checkpoint_fittable_sample_transformer():
    X = np.ones(shape=(10, 2), dtype=int)
    samples = [mario.Sample(data, key=str(i)) for i, data in enumerate(X)]
    oracle = X + 1

    with tempfile.TemporaryDirectory() as d:
        model_path = os.path.join(d, "model.pkl")
        features_dir = os.path.join(d, "features")

        transformer = mario.wrap(
            [DummyWithFit, "sample", "checkpoint"],
            model_path=model_path,
            features_dir=features_dir,
        )
        assert not mario.utils.is_estimator_stateless(transformer)
        features = transformer.fit(samples).transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, False)

        features = transformer.fit_transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, False)
        _assert_delayed_samples(features)

        # remove all files and call fit_transform again
        shutil.rmtree(d)
        features = transformer.fit_transform(samples)
        _assert_checkpoints(features, oracle, model_path, features_dir, False)
Example #9
0
def video_wrap_skpipeline(sk_pipeline):
    """
    This function takes a `sklearn.Pipeline` and wraps each estimator inside of it with
    :any:`bob.bio.video.transformer.VideoWrapper`
    """

    for i, name, estimator in sk_pipeline._iter():

        # 1. Unwrap the estimator
        # If the estimator is `Sample` wrapped takes `estimator.estimator`.
        transformer = (estimator.estimator
                       if hasattr(estimator, "estimator") else estimator)

        # 2. do a video wrap
        transformer = VideoWrapper(transformer)

        # 3. Sample wrap again
        transformer = wrap(
            ["sample"],
            transformer,
            fit_extra_arguments=estimator.fit_extra_arguments,
            transform_extra_arguments=estimator.transform_extra_arguments,
        )

        sk_pipeline.steps[i] = (name, transformer)

    return sk_pipeline
Example #10
0
def annotate(database, groups, annotator, output_dir, dask_client, **kwargs):
    """Annotates a database.

    The annotations are written in text file (json) format which can be read
    back using :any:`read_annotation_file` (annotation_type='json')
    """
    log_parameters(logger)

    # Allows passing of Sample objects as parameters
    annotator = wrap(["sample"], annotator, output_attribute="annotations")

    # Will save the annotations in the `data` fields to a json file
    annotator = wrap(
        ["checkpoint"],
        annotator,
        features_dir=output_dir,
        extension=".json",
        save_func=save_json,
        load_func=load_json,
        sample_attribute="annotations",
    )

    # Allows reception of Dask Bags
    annotator = wrap(["dask"], annotator)

    # Transformer that splits the samples into several Dask Bags
    to_dask_bags = ToDaskBag(npartitions=50)

    logger.debug("Retrieving samples from database.")
    samples = database.all_samples(groups)

    # Sets the scheduler to local if no dask_client is specified
    if dask_client is not None:
        scheduler = dask_client
    else:
        scheduler = "single-threaded"

    # Splits the samples list into bags
    dask_bags = to_dask_bags.transform(samples)

    logger.info(f"Saving annotations in {output_dir}.")
    logger.info(f"Annotating {len(samples)} samples...")
    annotator.transform(dask_bags).compute(scheduler=scheduler)

    logger.info("All annotations written.")
Example #11
0
def test_dask_checkpoint_transform_pipeline():
    X = np.ones(shape=(10, 2), dtype=int)
    samples_transform = [mario.Sample(data, key=str(i)) for i, data in enumerate(X)]
    with tempfile.TemporaryDirectory() as d:
        bag_transformer = mario.ToDaskBag()
        estimator = mario.wrap(["dask"], _build_transformer(d, 0), transform_tag="CPU")
        X_tr = estimator.transform(bag_transformer.transform(samples_transform))
        assert len(mario.dask_tags(estimator)) == 1
        assert len(X_tr.compute(scheduler="single-threaded")) == 10
Example #12
0
def embedding_transformer(
    cropped_image_size,
    embedding,
    cropped_positions,
    fixed_positions=None,
    color_channel="rgb",
    annotator=None,
    **kwargs,
):
    """
    Creates a pipeline composed by a FaceCropper and an Embedding extractor.
    This transformer is suited for Facenet based architectures

    .. warning::
       This will resize images to the requested `image_size`

    """

    face_cropper, transform_extra_arguments = make_cropper(
        cropped_image_size=cropped_image_size,
        cropped_positions=cropped_positions,
        fixed_positions=fixed_positions,
        color_channel=color_channel,
        annotator=annotator,
        **kwargs,
    )

    # Support None and "passthrough" Estimators
    if embedding is not None and type(embedding) is not str:
        embedding = wrap(["sample"], embedding)

    transformer = Pipeline([
        (
            "cropper",
            wrap(
                ["sample"],
                face_cropper,
                transform_extra_arguments=transform_extra_arguments,
            ),
        ),
        ("embedding", embedding),
    ])

    return transformer
Example #13
0
def load(annotation_type, fixed_positions=None):

    transform_extra_arguments = (("annotations", "annotations"), )

    transformer = make_pipeline(
        wrap(
            ["sample"],
            ToGray(),
            transform_extra_arguments=transform_extra_arguments,
        ),
        wrap(
            ["sample"],
            FunctionTransformer(lambda X: [x.flatten() for x in X]),
        ),
    )

    algorithm = Distance()

    return PipelineSimple(transformer, algorithm)
def test_resample():
    """Resample using the transformer."""
    audio_path = resource_filename("bob.bio.spear.test", "data/sample.wav")
    audio_n_samples = 77760
    audio_sample_rate = 16000

    sample = Sample(data=audio_path, channel=None, rate=audio_sample_rate)
    pipeline = make_pipeline(
        PathToAudio(), wrap(["sample"], Resample(audio_sample_rate // 2)))
    results = pipeline.transform([sample])[0]
    assert results.data.shape == (audio_n_samples // 2, ), results.data.shape
Example #15
0
def test_failing_sample_transformer():

    X = np.zeros(shape=(10, 2))
    samples = [mario.Sample(data) for i, data in enumerate(X)]
    expected = np.full_like(X, 2, dtype=np.object)
    expected[::2] = None
    expected[1::4] = None

    transformer = Pipeline(
        [
            ("1", mario.wrap([HalfFailingDummyTransformer, "sample"])),
            ("2", mario.wrap([HalfFailingDummyTransformer, "sample"])),
        ]
    )
    features = transformer.transform(samples)

    features = [f.data for f in features]
    assert len(expected) == len(
        features
    ), f"Expected: {len(expected)} but got: {len(features)}"
    assert all(
        (e == f).all() for e, f in zip(expected, features)
    ), f"Expected: {expected} but got: {features}"

    samples = [mario.Sample(data) for data in X]
    expected = [None] * X.shape[0]
    transformer = Pipeline(
        [
            ("1", mario.wrap([FullFailingDummyTransformer, "sample"])),
            ("2", mario.wrap([FullFailingDummyTransformer, "sample"])),
        ]
    )
    features = transformer.transform(samples)

    features = [f.data for f in features]
    assert len(expected) == len(
        features
    ), f"Expected: {len(expected)} but got: {len(features)}"
    assert all(
        e == f for e, f in zip(expected, features)
    ), f"Expected: {expected} but got: {features}"
Example #16
0
def test_fittable_sample_transformer():

    X = np.ones(shape=(10, 2), dtype=int)
    samples = [mario.Sample(data) for data in X]

    # Mixing up with an object
    transformer = mario.wrap([DummyWithFit, "sample"])
    features = transformer.fit(samples).transform(samples)
    _assert_all_close_numpy_array(X + 1, [s.data for s in features])

    features = transformer.fit_transform(samples)
    _assert_all_close_numpy_array(X + 1, [s.data for s in features])
Example #17
0
def _build_estimator(path, i):
    base_dir = os.path.join(path, f"transformer{i}")
    os.makedirs(base_dir, exist_ok=True)
    model_path = os.path.join(base_dir, "model.pkl")
    features_dir = os.path.join(base_dir, "features")

    transformer = mario.wrap(
        [DummyWithFit, "sample", "checkpoint"],
        model_path=model_path,
        features_dir=features_dir,
    )
    return transformer
Example #18
0
def check_valid_pipeline(pipeline_simple):
    """
    Applying some checks in the PipelineSimple
    """

    # CHECKING THE TRANSFORMER
    # Checking if it's a Scikit Pipeline or an estimator
    if isinstance(pipeline_simple.transformer, Pipeline):

        # Checking if all steps are wrapped as samples, if not, we should wrap them
        for p in pipeline_simple.transformer:
            if (
                not is_instance_nested(p, "estimator", SampleWrapper)
                and type(p) is not str
                and p is not None
            ):
                wrap(["sample"], p)

    # In this case it can be a simple estimator. AND
    # Checking if it's sample wrapper, if not, do it
    elif is_instance_nested(
        pipeline_simple.transformer, "estimator", BaseEstimator
    ) and is_instance_nested(
        pipeline_simple.transformer, "estimator", BaseEstimator
    ):
        wrap(["sample"], pipeline_simple.transformer)
    else:
        raise ValueError(
            f"pipeline_simple.transformer should be instance of either `sklearn.pipeline.Pipeline` or"
            f"sklearn.base.BaseEstimator, not {pipeline_simple.transformer}"
        )

    # Checking the Biometric algorithm
    if not isinstance(pipeline_simple.biometric_algorithm, BioAlgorithm):
        raise ValueError(
            f"pipeline_simple.biometric_algorithm should be instance of `BioAlgorithm`"
            f"not {pipeline_simple.biometric_algorithm}"
        )

    return True
Example #19
0
def wrap_checkpoint_preprocessor(
    preprocessor,
    features_dir=None,
    transform_extra_arguments=(("annotations", "annotations"), ),
    load_func=None,
    save_func=None,
    extension=".hdf5",
):
    """
    Wraps :any:`bob.bio.base.preprocessor.Preprocessor` with
    :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper`

    Parameters
    ----------

    preprocessor: :any:`bob.bio.base.preprocessor.Preprocessor`
       Instance of :any:`bob.bio.base.transformers.PreprocessorTransformer` to be wrapped

    features_dir: str
       Features directory to be checkpointed (see :any:bob.pipelines.CheckpointWrapper`).

    extension : str, optional
        Extension o preprocessed files (see :any:bob.pipelines.CheckpointWrapper`).

    load_func : None, optional
        Function that loads data to be preprocessed.
        The default is :any:`bob.bio.base.preprocessor.Preprocessor.read_data`

    save_func : None, optional
        Function that saves preprocessed data.
        The default is :any:`bob.bio.base.preprocessor.Preprocessor.write_data`

    transform_extra_arguments: [tuple]
        Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments`

    """

    transformer = PreprocessorTransformer(preprocessor)
    return mario.wrap(
        ["sample", "checkpoint"],
        transformer,
        load_func=load_func or preprocessor.read_data,
        save_func=save_func or preprocessor.write_data,
        features_dir=features_dir,
        transform_extra_arguments=transform_extra_arguments,
        extension=extension,
    )
Example #20
0
def test_function_sample_transfomer():

    X = np.zeros(shape=(10, 2), dtype=int)
    samples = [mario.Sample(data) for data in X]

    transformer = mario.wrap(
        [FunctionTransformer, "sample"],
        func=_offset_add_func,
        kw_args=dict(offset=3),
        validate=True,
    )

    features = transformer.transform(samples)
    _assert_all_close_numpy_array(X + 3, [s.data for s in features])

    features = transformer.fit_transform(samples)
    _assert_all_close_numpy_array(X + 3, [s.data for s in features])
Example #21
0
def run_experiment(dataset):
    def linearize(X):
        X = np.asarray(X)
        return np.reshape(X, (X.shape[0], -1))

    # Testing it in a real recognition systems
    transformer = wrap(
        ["sample"], make_pipeline(FunctionTransformer(linearize))
    )

    pipeline_simple = PipelineSimple(transformer, Distance())

    return pipeline_simple(
        dataset.background_model_samples(),
        dataset.references(),
        dataset.probes(),
    )
Example #22
0
def wrap_sample_extractor(
    extractor,
    fit_extra_arguments=None,
    transform_extra_arguments=None,
    model_path=None,
    **kwargs,
):
    """
    Wraps :any:`bob.bio.base.extractor.Extractor` with
    :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper`

    Parameters
    ----------

    extractor: :any:`bob.bio.base.extractor.Preprocessor`
       Instance of :any:`bob.bio.base.transformers.ExtractorTransformer` to be wrapped

    transform_extra_arguments: [tuple], optional
        Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments`

    model_path: str
        Path to `extractor_file` in :any:`bob.bio.base.extractor.Extractor`

    """

    extractor_file = (os.path.join(model_path, "Extractor.hdf5")
                      if model_path is not None else None)

    transformer = ExtractorTransformer(extractor, model_path=extractor_file)

    (
        transform_extra_arguments,
        fit_extra_arguments,
    ) = _prepare_extractor_sample_args(extractor, transform_extra_arguments,
                                       fit_extra_arguments)

    return mario.wrap(
        ["sample"],
        transformer,
        transform_extra_arguments=transform_extra_arguments,
        fit_extra_arguments=fit_extra_arguments,
        **kwargs,
    )
import bob.pipelines as mario

pipeline = mario.wrap(["dask"], pipeline)  # noqa
Example #24
0
def wrap_bob_legacy(
    bob_object,
    dir_name,
    fit_extra_arguments=None,
    transform_extra_arguments=None,
    dask_it=False,
    **kwargs,
):
    """
    Wraps either :any:`bob.bio.base.preprocessor.Preprocessor` or
    :any:`bob.bio.base.extractor.Extractor` with
    :any:`sklearn.base.TransformerMixin` and
    :any:`bob.pipelines.wrappers.CheckpointWrapper` and
    :any:`bob.pipelines.wrappers.SampleWrapper`


    Parameters
    ----------

    bob_object: object
        Instance of :any:`bob.bio.base.preprocessor.Preprocessor` or
        :any:`bob.bio.base.extractor.Extractor`

    dir_name: str
        Directory name for the checkpoints

    fit_extra_arguments: [tuple]
        Same behavior as in Check
        :any:`bob.pipelines.wrappers.fit_extra_arguments`

    transform_extra_arguments: [tuple]
        Same behavior as in Check
        :any:`bob.pipelines.wrappers.transform_extra_arguments`

    dask_it: bool
        If True, the transformer will be a dask graph
    """

    if isinstance(bob_object, Preprocessor):
        transformer = wrap_checkpoint_preprocessor(
            bob_object,
            features_dir=os.path.join(dir_name, "preprocessor"),
            **kwargs,
        )
    elif isinstance(bob_object, Extractor):
        transformer = wrap_checkpoint_extractor(
            bob_object,
            features_dir=os.path.join(dir_name, "extractor"),
            model_path=dir_name,
            fit_extra_arguments=fit_extra_arguments,
            transform_extra_arguments=transform_extra_arguments,
            **kwargs,
        )
    else:
        raise ValueError(
            "`bob_object` should be an instance of `Preprocessor`, `Extractor` and `Algorithm`"
        )

    if dask_it:
        transformer = mario.wrap(["dask"], transformer)

    return transformer
Example #25
0
def wrap_checkpoint_extractor(
    extractor,
    features_dir=None,
    fit_extra_arguments=None,
    transform_extra_arguments=None,
    load_func=None,
    save_func=None,
    extension=".hdf5",
    model_path=None,
    **kwargs,
):
    """
    Wraps :any:`bob.bio.base.extractor.Extractor` with
    :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper`

    Parameters
    ----------

    extractor: :any:`bob.bio.base.extractor.Preprocessor`
       Instance of :any:`bob.bio.base.transformers.ExtractorTransformer` to be wrapped

    features_dir: str
       Features directory to be checkpointed (see :any:bob.pipelines.CheckpointWrapper`).

    extension : str, optional
        Extension o preprocessed files (see :any:bob.pipelines.CheckpointWrapper`).

    load_func : None, optional
        Function that loads data to be preprocessed.
        The default is :any:`bob.bio.base.extractor.Extractor.read_feature`

    save_func : None, optional
        Function that saves preprocessed data.
        The default is :any:`bob.bio.base.extractor.Extractor.write_feature`

    fit_extra_arguments: [tuple]
        Same behavior as in Check :any:`bob.pipelines.wrappers.fit_extra_arguments`

    transform_extra_arguments: [tuple], optional
        Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments`

    model_path: str
        See :any:`TransformerExtractor`.

    """

    extractor_file = (os.path.join(model_path, "Extractor.hdf5")
                      if model_path is not None else None)

    model_file = (os.path.join(model_path, "Extractor.pkl")
                  if model_path is not None else None)
    transformer = ExtractorTransformer(extractor, model_path=extractor_file)

    (
        transform_extra_arguments,
        fit_extra_arguments,
    ) = _prepare_extractor_sample_args(extractor, transform_extra_arguments,
                                       fit_extra_arguments)

    return mario.wrap(
        ["sample", "checkpoint"],
        transformer,
        load_func=load_func or extractor.read_feature,
        save_func=save_func or extractor.write_feature,
        model_path=model_file,
        features_dir=features_dir,
        transform_extra_arguments=transform_extra_arguments,
        fit_extra_arguments=fit_extra_arguments,
        **kwargs,
    )
Example #26
0
# Kmeans machine used for GMM initialization
kmeans_trainer = KMeansMachine(
    n_clusters=n_gaussians,
    max_iter=25,
    convergence_threshold=0.0,
    init_max_iter=5,
    oversampling_factor=128,
)

# Algorithm used for enrollment and scoring, trained first as a Transformer.
bioalgorithm = GMM(
    n_gaussians=n_gaussians,
    max_fitting_steps=25,
    enroll_iterations=1,
    convergence_threshold=
    0.0,  # Maximum number of iterations as stopping criterion
    k_means_trainer=kmeans_trainer,
    random_state=2,
)

# Transformer part of PipelineSimple
transformer = Pipeline([
    ("annotator", wrap(["sample"], Mod_4Hz())),
    ("extractor", wrap(["sample"], Cepstral())),
    ("algorithm_trainer", wrap(["sample"], bioalgorithm)),
])

# PipelineSimple instance used by `execute_pipeline_simple` or the `pipeline simple` command
pipeline = PipelineSimple(transformer, bioalgorithm)
Example #27
0
    k_means_trainer=KMeansMachine(
        n_clusters=256,
        max_iter=2,
        random_state=SEED,
        init_max_iter=5,
        oversampling_factor=64,
    ),
    return_stats_in_transform=True,
)

bioalgorithm = ISV(
    # ISV parameters
    r_U=50,
    random_state=SEED,
    em_iterations=2,
    enroll_iterations=1,
    # GMM parameters
    ubm=ubm,
)

transformer = Pipeline([
    ("annotator", Energy_2Gauss()),
    ("extractor", Cepstral()),
    ("ubm", ubm),
    ("reference_id_encoder", ReferenceIdEncoder()),
    ("isv", bioalgorithm),
])
transformer = wrap(["sample"], transformer)

pipeline = PipelineSimple(transformer, bioalgorithm)
Example #28
0
def annotate_samples(samples, reader, make_key, annotator, output_dir,
                     dask_client, **kwargs):
    """Annotates a list of samples.

    This command is very similar to ``bob bio annotate`` except that it works
    without a database interface. You must provide a list of samples as well as
    two functions:

        def reader(sample):
            # Loads data from a sample.
            # for example:
            data = bob.io.base.load(sample)
            # data will be given to the annotator
            return data

        def make_key(sample):
            # Creates a unique str identifier for this sample.
            # for example:
            return str(sample)
    """
    log_parameters(logger, ignore=("samples", ))

    # Allows passing of Sample objects as parameters
    annotator = wrap(["sample"], annotator, output_attribute="annotations")

    # Will save the annotations in the `data` fields to a json file
    annotator = wrap(
        bases=["checkpoint"],
        estimator=annotator,
        features_dir=output_dir,
        extension=".json",
        save_func=save_json,
        load_func=load_json,
        sample_attribute="annotations",
    )

    # Allows reception of Dask Bags
    annotator = wrap(["dask"], annotator)

    # Transformer that splits the samples into several Dask Bags
    to_dask_bags = ToDaskBag(npartitions=50)

    if dask_client is not None:
        scheduler = dask_client
    else:
        scheduler = "single-threaded"

    # Converts samples into a list of DelayedSample objects
    samples_obj = [
        DelayedSample(
            load=functools.partial(reader, s),
            key=make_key(s),
        ) for s in samples
    ]

    # Splits the samples list into bags
    dask_bags = to_dask_bags.transform(samples_obj)

    logger.info(f"Saving annotations in {output_dir}")
    logger.info(f"Annotating {len(samples_obj)} samples...")
    annotator.transform(dask_bags).compute(scheduler=scheduler)

    logger.info("All annotations written.")
# Kmeans machine used for GMM initialization
kmeans_trainer = KMeansMachine(
    n_clusters=n_gaussians,
    max_iter=25,
    convergence_threshold=0.0,
    init_max_iter=5,
    oversampling_factor=64,
)

# Algorithm used for enrollment and scoring, trained first as a Transformer.
bioalgorithm = GMM(
    n_gaussians=n_gaussians,
    max_fitting_steps=25,
    enroll_iterations=1,
    convergence_threshold=
    0.0,  # Maximum number of iterations as stopping criterion
    k_means_trainer=kmeans_trainer,
    random_state=2,
)

# Transformer part of PipelineSimple
transformer = Pipeline([
    ("annotator", wrap(["sample"], Energy_2Gauss())),
    ("extractor", wrap(["sample"], Cepstral())),
    ("algorithm_trainer", wrap(["sample"], bioalgorithm)),
])

# PipelineSimple instance used by `execute_pipeline_simple` or the `pipeline simple` command
pipeline = PipelineSimple(transformer, bioalgorithm)
Example #30
0
def test_failing_checkpoint_transformer():

    X = np.zeros(shape=(10, 2))
    samples = [mario.Sample(data, key=i) for i, data in enumerate(X)]
    expected = np.full_like(X, 2)
    expected[::2] = None
    expected[1::4] = None
    expected = list(expected)

    with tempfile.TemporaryDirectory() as d:
        features_dir_1 = os.path.join(d, "features_1")
        features_dir_2 = os.path.join(d, "features_2")
        transformer = Pipeline(
            [
                (
                    "1",
                    mario.wrap(
                        [HalfFailingDummyTransformer, "sample", "checkpoint"],
                        features_dir=features_dir_1,
                    ),
                ),
                (
                    "2",
                    mario.wrap(
                        [HalfFailingDummyTransformer, "sample", "checkpoint"],
                        features_dir=features_dir_2,
                    ),
                ),
            ]
        )
        features = transformer.transform(samples)

        np_features = np.array(
            [
                np.full(X.shape[1], np.nan) if f.data is None else f.data
                for f in features
            ]
        )
        assert len(expected) == len(
            np_features
        ), f"Expected: {len(expected)} but got: {len(np_features)}"
        assert np.allclose(
            expected, np_features, equal_nan=True
        ), f"Expected: {expected} but got: {np_features}"

    samples = [mario.Sample(data, key=i) for i, data in enumerate(X)]
    expected = [None] * X.shape[0]

    with tempfile.TemporaryDirectory() as d:
        features_dir_1 = os.path.join(d, "features_1")
        features_dir_2 = os.path.join(d, "features_2")
        transformer = Pipeline(
            [
                (
                    "1",
                    mario.wrap(
                        [FullFailingDummyTransformer, "sample", "checkpoint"],
                        features_dir=features_dir_1,
                    ),
                ),
                (
                    "2",
                    mario.wrap(
                        [FullFailingDummyTransformer, "sample", "checkpoint"],
                        features_dir=features_dir_2,
                    ),
                ),
            ]
        )
        features = transformer.transform(samples)

        assert len(expected) == len(
            features
        ), f"Expected: {len(expected)} but got: {len(features)}"
        assert all(
            e == f.data for e, f in zip(expected, features)
        ), f"Expected: {expected} but got: {features}"