def test_MLFlowBackendFactory(self): """ Test pypads mlflow backend factory :return: """ from pypads.app.backends.mlflow import MLFlowBackendFactory, LocalMlFlowBackend, \ MongoSupportMixin, RemoteMlFlowBackend from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config) # --------------------------- asserts ------------------------------ # Without MongoSupport tracker.config = {**tracker.config, **{"mongo_db":False}} uri = TEST_FOLDER self.assertIsInstance(MLFlowBackendFactory.make(uri), LocalMlFlowBackend) self.assertNotIsInstance(MLFlowBackendFactory.make(uri), MongoSupportMixin) uri = "http://mlflow.padre-lab.eu" self.assertIsInstance(MLFlowBackendFactory.make(uri), RemoteMlFlowBackend) self.assertNotIsInstance(MLFlowBackendFactory.make(uri), MongoSupportMixin) # With MongoSupport tracker.config = {**tracker.config, **{"mongo_db":True}} uri = TEST_FOLDER self.assertIsInstance(MLFlowBackendFactory.make(uri), MongoSupportMixin) uri = "http://mlflow.padre-lab.eu" self.assertIsInstance(MLFlowBackendFactory.make(uri), MongoSupportMixin)
def test_pipeline(self): """ This example will track the experiment exection with the default configuration. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads set_up_fns = {} from pypads.app.base import PyPads tracker = PyPads(setup_fns=set_up_fns, log_level="DEBUG") tracker.start_track(experiment_name="1. Experiment") import timeit t = timeit.Timer(sklearn_pipeline_experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- run = mlflow.active_run() assert tracker.api.active_run().info.run_id == run.info.run_id # artifacts = [x for x in tracker.results.get_artifacts(run_id=run.info.run_id)] # assert len(artifacts) > 0 tracker.api.end_run()
def test_experiment_configuration(self): # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads() tracker.start_track(experiment_name="ConfiguredExperiment") from sklearn import datasets, metrics from sklearn.tree import DecisionTreeClassifier # load the iris datasets dataset = datasets.load_iris() # fit a model to the data model = DecisionTreeClassifier() model.fit(dataset.data, dataset.target) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) # print(metrics.confusion_matrix(expected, predicted)) # assert statements # assert tracker._experiment.regex == "ConfiguredExperiment" # TODO add asserts tracker.api.end_run()
def test_recursive_function_tracking(self): """ This example tests the tracking of recursive function :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads class TestLogger(InjectionLogger): """ Set pre and post bools to true. This is a utility logger for testing purposes. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def __post__(self, ctx, *args, _logger_call, _pypads_pre_return, _pypads_result, _args, _kwargs, **kwargs): nonlocal i i += 1 test = TestLogger() events = { "test_logger": test } hooks = { "test_logger": {"on": ["pypads_log"]}, } config = {"mongo_db": False} setup_fns = {} tracker = PyPads(uri=TEST_FOLDER, config=config, autostart=True, hooks=hooks, events=events, setup_fns=setup_fns, log_level="DEBUG") @tracker.decorators.track(event=["pypads_log"]) def recursive_dummy(s: str): if len(s) > 1: recursive_dummy(s[:-1]) else: print("I am a dummy recursion") return s # --------------------------- asserts --------------------------- i = 0 config["recursion_identity"] = True # Ignoring recursive function calls is enabled tracker.config = {**DEFAULT_CONFIG, **config} recursive_dummy("".join([str(i) for i in range(1, 10)])) self.assertEqual(1, i) i = 1 config["recursion_identity"] = False # Ignoring recursive function calls is disabled tracker.config = {**DEFAULT_CONFIG, **config} recursive_dummy("".join([str(i) for i in range(1, 10)])) self.assertEqual(10, i) # !-------------------------- asserts --------------------------- # End the mlflow run opened by PyPads tracker.api.end_run()
def parallel_tracking(min_samples_leaf=1): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config={"mongo_db": False}, autostart=True) from tests.test_sklearn.base_sklearn_test import sklearn_simple_decision_tree_experiment sklearn_simple_decision_tree_experiment(min_samples_leaf=min_samples_leaf) tracker.deactivate_tracking(run_atexits=True, reload_modules=False) return min_samples_leaf
def test_run_teardown(self): """ This example will test run teardown logger functionalities in _call which is called on PyPads __init__ :return: """ flag = False class TestLogger(RunTeardown): """ Set pre and post bools to true. This is a utility logger for testing purposes. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _call(self, *args, _pypads_env: LoggerEnv, _logger_call: LoggerCall, _logger_output, **kwargs): nonlocal flag flag = True # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, setup_fns={}, autostart=True) tracker.api.register_teardown(name="test_teardown", post_fn=TestLogger()) # --------------------------- asserts --------------------------- self.assertFalse(flag) tracker.api.end_run() self.assertTrue(flag)
def test_minimal_mapping(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads from pypads.injections.setup.misc_setup import DependencyRSF set_up_fns = {DependencyRSF()} tracker = PyPads(uri=TEST_FOLDER, config=config, mappings=[minimal], autostart=True, setup_fns=set_up_fns) import timeit t = timeit.Timer(sklearn_pipeline_experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- run = mlflow.active_run() assert tracker.api.active_run().info.run_id == run.info.run_id artifacts = [ x for x in tracker.results.get_artifacts(run_id=run.info.run_id) ] assert len(artifacts) > 0
def test_order_lf(self): """ This example will track the experiment exection with the default configuration. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, autostart=True) tracker.api.track(experiment, anchors=["order"], ctx=sys.modules[__name__]) import timeit t = timeit.Timer(experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- from pypads.app.pypads import get_current_pads pads = get_current_pads() assert pads.cache.run_exists(0, 1, 2)
def test_retry(self): """ This example will track a failure and only work on the second run. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, setup_fns={}, autostart=True) i = 0 def experiment(): print("I'm an function level experiment") nonlocal i if i == 0: i = i + 1 raise Exception("Planed failure") else: return "I'm a retried return value." experiment = tracker.api.track(experiment, anchors=["pypads_log"]) import timeit t = timeit.Timer(experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- from pypads.app.pypads import get_current_pads pads = get_current_pads() assert pads.cache.run_exists(id(logger)) self.assertEqual(i, 1)
def test_run_setup(self): """ This example will test run setup logger functionalities in _call which is called on PyPads __init__ :return: """ # --------------------------- setup of the tracking --------------------------- flag = False class TestLogger(RunSetup): """ Set pre and post bools to true. This is a utility logger for testing purposes. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _call(self, *args, _pypads_env: LoggerEnv, _logger_call: LoggerCall, _logger_output, **kwargs): nonlocal flag flag = True # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, setup_fns={TestLogger()}, autostart=True) # --------------------------- asserts --------------------------- self.assertTrue(flag)
def test_dataset(self): """ This example will track the concepts created by the decorated function """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart=True) from sklearn.datasets import make_classification ds_name = "generated" @tracker.decorators.dataset(name=ds_name, output_format={ 'X': 'features', 'y': 'targets' }) def load_data(): X, y = make_classification(n_samples=150) return X, y dataset = load_data() # --------------------------- asserts --------------------------- datasets_repo = tracker.dataset_repository hash_id = persistent_hash(str(dataset)) self.assertTrue(datasets_repo.has_object(uid=hash_id)) # !-------------------------- asserts --------------------------- tracker.api.end_run()
def test_decorator_passed_event(self): """ This example will track the experiment exection with passed event. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, autostart=True) @tracker.decorators.track(event="pypads_log") def experiment(): print("I'm an function level experiment") return "I'm a return value." import timeit t = timeit.Timer(experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- from pypads.app.pypads import get_current_pads pads = get_current_pads() assert pads.cache.run_exists(id(logger))
def test_simple_parameter_mapping(self): # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart=True) from sklearn import datasets, metrics from sklearn.tree import DecisionTreeClassifier # load the iris datasets dataset = datasets.load_iris() # fit a model to the data model = DecisionTreeClassifier() model.fit(dataset.data, dataset.target) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted)) # assert statements import mlflow run = mlflow.active_run() assert tracker.api.active_run().info.run_id == run.info.run_id # assert len(tracker.mlf.list_artifacts(run.info.run_id)) == 0 # parameters = [x for x in tracker.results.get_parameters(run_id=run.info.run_id)] # assert len(parameters) != 0 tracker.api.end_run()
def test_api_layered_tracking(self): """ This example will track the experiment execution with the default configuration. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, autostart=True) global experiment experiment = tracker.api.track(experiment, anchors=["ran"]) global sub_experiment sub_experiment = tracker.api.track(sub_experiment, anchors=["ran"]) global more_experiment more_experiment = tracker.api.track(more_experiment, anchors=["ran"]) import timeit t = timeit.Timer(experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- from pypads.app.pypads import get_current_pads pads = get_current_pads() assert pads.cache.run_exists(id(logger)) assert pads.cache.run_get(id(logger)) == 16 # !-------------------------- asserts ---------------------------
def test_multi_injection_logger(self): """ This example will test multi injection logger functionalities. :return: """ # --------------------------- setup of the tracking --------------------------- class TestLogger(MultiInjectionLogger): """ increase attrs i and j with each call. This is a utility logger for testing purposes. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.i = 0 @classmethod def output_schema_class(cls) -> Optional[Type[OutputModel]]: return DummyOutput @staticmethod def finalize_output(pads, logger_call, output, *args, **kwargs): pass def __pre__(self, ctx, *args, _logger_call, _logger_output, _args, _kwargs, **kwargs): self.i += 1 def __post__(self, ctx, *args, _logger_call, _logger_output, _pypads_pre_return, _pypads_result, _args, _kwargs, **kwargs): _logger_output.var = self.i test = TestLogger() events = { "test_logger": test } hooks = { "test_logger": {"on": ["pypads_log"]}, } # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, setup_fns={}, autostart=True) experiment = tracker.api.track(dummy_function, anchors=["pypads_log"]) import timeit t = timeit.Timer(experiment) print(t.timeit(5)) # --------------------------- asserts --------------------------- self.assertEqual(test.i, 5) self.assertTrue(tracker.cache.run_exists(id(test))) data = tracker.cache.run_get(id(test)) logger_call = data.get('logger_call') output = data.get('output') self.assertEqual(len(logger_call.call_stack), 5) self.assertEqual(output.var, 5)
def test_pool_execution_single_tracker(self): from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config={"mongo_db": False}, autostart=True) import timeit t = timeit.Timer( pool_execution(parallel_no_tracking, punch_dummy_gen())) print(t.timeit(1))
def test_patch_untracked_changes(self): """ This example will test preserving untracked changes in the existing repository pypads will manage :return: """ # Activate tracking of pypads from pypads.app.base import PyPads import os with TempDir(chdr=True) as test_folder: tracker = PyPads(uri=self.folder.name, config=config, setup_fns={}, autostart=True) managed_git: ManagedGit = tracker.managed_git_factory( test_folder._path) # add untracked changes to the repository with open(os.path.join(test_folder._path, "new_file.txt"), "w") as file: file.write("new untracked changes.") # managed_git: ManagedGit = tracker.managed_git_factory(test_folder._path) # --------------------------- asserts ------------------------------ self.assertTrue(managed_git.has_changes()) # Create a patch of the current state of the repository status = managed_git.repo.git.status() patch, patch_hash = managed_git.create_patch() # Do some changes os.remove(os.path.join(test_folder._path, "new_file.txt")) self.assertNotEqual(status, managed_git.repo.git.status()) # Restore the state with TempDir(chdr=False) as patch_folder: with open(os.path.join(patch_folder._path, "patch.patch"), "w") as f: f.write(patch) managed_git.restore_patch( os.path.join(patch_folder._path, "patch.patch")) self.assertEqual(status, managed_git.repo.git.status()) tracker.api.end_run()
def test_track_artifact(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, autostart=True) import os import pickle as pkl import numpy as np name = 'some_artifact.pickle' path = os.path.join(os.getcwd(), name) description = 'logging an artifact from a local path' obj = np.random.random(size=(3, 3)) keys = [ 'experiment_id', 'run_id', 'category', 'storage_type', 'description', 'name', 'description', 'produced_by' ] with open(path, 'wb') as f: pkl.dump(obj, f) tracker.api.log_artifact(local_path=path, additional_data=None, holder=None, description=description) os.remove(path=path) holder = tracker.api.get_programmatic_output() meta = ArtifactMetaModel(value_format='str', file_format=FileFormats.pickle, description=description, file_size=229, data=str(obj), parent=holder, produced_by=holder.produced_by, part_of=get_reference(holder)) # Load the artifacts artifacts = [ x for x in tracker.results.get_artifacts(run_id=meta.run.uid) if x.data == name ] # Load the data from the pypads path loaded_data = tracker.results.load_artifact( name, read_format=FileFormats.pickle) # --------------------------- asserts --------------------------- assert bool((loaded_data == obj).all()) is True assert len(artifacts) == 1 artifacts = artifacts[0] for key in keys: assert meta.dict().get(key) == artifacts.dict().get(key)
def test_keras_autolog(self): # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart="KerasAutolog") import timeit t = timeit.Timer(keras_simple_sequential_experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- # TODO Add asserts # !-------------------------- asserts --------------------------- tracker.api.end_run()
def test_keras_base_class(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart="KerasAutolog", log_level="DEBUG") import timeit t = timeit.Timer(keras_simple_sequential_experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- # TODO add asserts # !-------------------------- asserts --------------------------- tracker.api.end_run()
def get_current_pads(init=False) -> Union[None, PyPads]: """ Get the currently active pypads instance. All duck punched objects use this function for interacting with pypads. :return: """ global current_pads if not current_pads: if init: # Try to reload pads if it was already defined in the active run config = get_current_config() if config: logger.warning( "PyPads seems to be missing on given run with saved configuration. Reinitializing.") return PyPads(config=config) else: logger.warning( "PyPads has to be initialized before it can be used. Initializing for your with default values.") return PyPads() else: raise UninitializedTrackerException( "Pypads didn't get initialized and can't be used. Inititalize PyPads by creating an instance.") return current_pads
def test_default_tracking(self): """ This example will track the experiment exection with the default configuration. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart=False, log_level="WARNING") tracker.start_track(experiment_name="1. Experiment") tracker.actuators.set_random_seed(seed=1) import timeit t = timeit.Timer(sklearn_simple_decision_tree_experiment) from pypads import logger logger.info(t.timeit(1)) # --------------------------- asserts --------------------------- import mlflow run = mlflow.active_run() assert tracker.api.active_run().info.run_id == run.info.run_id tracker.api.end_run()
def test_torch_sequential_class(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(autostart="MNIST-Torch", setup_fns=[]) try: import timeit t = timeit.Timer(torch_simple_example) print(t.timeit(1)) except RuntimeError as e: logger.warning("Torch bug on re-import: {}".format(str(e))) # --------------------------- asserts --------------------------- # TODO Add asserts # !-------------------------- asserts --------------------------- tracker.api.end_run()
def test_git_repository_init(self): """ This example will test the versioning of source code. :return: """ # Activate tracking of pypads from pypads.app.base import PyPads with TempDir(chdr=True) as test_folder: tracker = PyPads(uri=self.folder.name, config=config, setup_fns={}, autostart=True) # --------------------------- asserts ------------------------------ with self.assertRaises(InvalidGitRepositoryError): git.Repo(path=test_folder._path) managed_git: ManagedGit = tracker.managed_git_factory( test_folder._path) temp_repo = git.Repo(test_folder._path) self.assertEqual(temp_repo, managed_git.repo) tracker.api.end_run()
def test_default_splitter_with_params(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, autostart=True) @tracker.decorators.dataset(name="iris") def load_iris(): from sklearn.datasets import load_iris return load_iris() data = load_iris() splits = tracker.actuators.default_splitter(data.data, strategy="cv", n_folds=3, val_ratio=0.2) # --------------------------- asserts --------------------------- # id of the splits logger from pypads_padre.bindings.events import DEFAULT_PADRE_LOGGING_FNS SplitILF = DEFAULT_PADRE_LOGGING_FNS["splits"][0] _id = id(SplitILF) import numpy for train_idx, test_idx, val_idx in splits: print("train: {}\n test: {}\n val: {}".format( train_idx, test_idx, val_idx)) self.assertTrue(tracker.cache.run_exists("current_split")) split_id = tracker.cache.run_get("current_split") self.assertTrue(tracker.cache.run_exists(_id)) logger_cached = tracker.cache.run_get(_id) output = logger_cached.get('output') splits = output.splits self.assertTrue(str(split_id) in splits.splits.keys()) current_split = splits.splits[str(split_id)] self.assertTrue( numpy.array_equal(train_idx, current_split.train_set)) self.assertTrue(numpy.array_equal(test_idx, current_split.test_set)) self.assertTrue( numpy.array_equal(val_idx, current_split.validation_set)) # !-------------------------- asserts --------------------------- tracker.api.end_run()
def test_punch_before_import(self): from pypads.app.base import PyPads from test_classes.dummy_mapping import _get_punch_dummy_mapping tracker = PyPads(uri=TEST_FOLDER, mappings=_get_punch_dummy_mapping()) tracker.activate_tracking(reload_modules=True) tracker.start_track() from test_classes.dummy_classes import PunchDummy from test_classes.dummy_classes import PunchDummy2 dummy2 = PunchDummy2(2) assert hasattr(PunchDummy, "_pypads_mapping_PunchDummy") assert hasattr(PunchDummy2, "_pypads_mapping_PunchDummy2") assert hasattr(dummy2, "_pypads_mapping_PunchDummy2")
def test_track_mem_artifact(self): # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, autostart=True) path = 'some_artifact' description = 'Storing test array as an artifact' keys = [ 'experiment_id', 'run_id', 'category', 'storage_type', 'description', 'name', 'description', 'produced_by' ] import numpy as np obj = np.random.random(size=(3, 3)) holder = tracker.api.get_programmatic_output() tracker.api.log_mem_artifact(path=path, obj=obj, write_format=FileFormats.pickle, description=description, additional_data=None, holder=None) meta = ArtifactMetaModel(value_format='str', file_format=FileFormats.pickle, description=description, file_size=229, data=str(obj), parent=holder, produced_by=holder.produced_by, part_of=get_reference(holder)) artifacts = [ x for x in tracker.results.get_artifacts(run_id=meta.run.uid) if x.data == path + '.pickle' ] # --------------------------- asserts --------------------------- assert len(artifacts) == 1 artifacts = artifacts[0] for key in keys: assert meta.dict().get(key) == artifacts.dict().get(key)
def test_injection_logger(self): """ This example will test injection logger functionalities in __pre__ and __post__ :return: """ # --------------------------- setup of the tracking --------------------------- class TestLogger(InjectionLogger): """ Set pre and post bools to true. This is a utility logger for testing purposes. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.pre = False self.post = False def __pre__(self, ctx, *args, _logger_call, _logger_output, _args, _kwargs, **kwargs): self.pre = True def __post__(self, ctx, *args, _logger_call, _pypads_pre_return, _pypads_result, _args, _kwargs, **kwargs): self.post = True test = TestLogger() events = { "test_logger": test } hooks = { "test_logger": {"on": ["pypads_log"]}, } # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, setup_fns={}, autostart=True) experiment = tracker.api.track(dummy_function, anchors=["pypads_log"]) import timeit t = timeit.Timer(experiment) print(t.timeit(1)) # --------------------------- asserts --------------------------- self.assertTrue(test.pre) self.assertTrue(test.post)
def test_multiple_fits(self): # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, autostart=True) from sklearn import datasets from sklearn.tree import DecisionTreeClassifier # load the iris datasets dataset = datasets.load_iris() # fit a model to the data model = DecisionTreeClassifier() model.fit(dataset.data, dataset.target) model.fit(dataset.data, dataset.target) # TODO add asserts tracker.api.end_run()
def test_failed_logging(self): """ This example will track a failure in a decorated function. :return: """ # --------------------------- setup of the tracking --------------------------- # Activate tracking of pypads from pypads.app.base import PyPads tracker = PyPads(uri=TEST_FOLDER, config=config, hooks=hooks, events=events, setup_fns={}, autostart=True) def failing_function(): print("I'm a failing function") raise Exception("Planed failure") failing_function = tracker.api.track(failing_function, anchors=["pypads_log"]) # --------------------------- asserts --------------------------- with self.assertRaises(Exception): failing_function()