Ejemplo n.º 1
0
def check_determinism():
    from pypads.app.pypads import get_current_pads
    pads = get_current_pads()
    if is_package_available('tensorflow'):
        import tensorflow
        tf_version = tensorflow.version.VERSION
        if tensorflow.match("(1\.(14|15)|2\.0)", tf_version):
            if "TF_USE_CUDNN_AUTOTUNE" in os.environ:
                logger.warning(
                    "When using TF auto-tuning of cuDNN convolution algorithms your experiment might"
                    " be non-deterministic.")
                pads.api.set_tag("non-determinism", "CUDNN_AUTOTUNE")

            if ("TF_CUDNN_DETERMINISTIC" not in os.environ
                    or (not os.environ["TF_CUDNN_DETERMINISTIC"]
                        and os.environ["TF_CUDNN_DETERMINISTIC"] is not 1)):
                if not is_package_available("tfdeterminism"):
                    logger.warning(
                        "Your experiment might include a gpu-specific sources of non-determinism."
                        " See https://github.com/NVIDIA/tensorflow-determinism"
                    )
                    pads.api.set_tag(
                        "non-determinism",
                        "TF auto-tuning of cuDNN convolution algorithms (see multi-algo note)"
                    )
Ejemplo n.º 2
0
    def _wrapped_inner_function(_self, *args, _pypads_env: InjectionLoggerEnv, **kwargs):
        """
        Wrapped function logic. This executes all hooks and the function itself.
        :param _self: Reference to
        :param args:
        :param kwargs:
        :return:
        """

        env = _pypads_env
        call = env.call
        if not call.has_hook(env.hook):
            call.add_hook(env.hook)

            try:
                # check for name collision in parameters
                if set([k for k, v in kwargs.items()]) & set(
                        [k for k, v in env.parameter.items()]):
                    logger.warning("Hook parameter is overwriting a parameter in the standard "
                                   "model call. This most likely will produce side effects.")

                if env.hook:
                    return env.hook(_self, _pypads_env=_pypads_env, *args, **kwargs)
                return env.callback(*args, **kwargs)
            finally:
                call.remove_hook(env.hook)
        else:
            return env.callback(*args, **kwargs)
Ejemplo n.º 3
0
    def end_run(self):
        """
        End the current run and run its tearDown functions.
        :return:
        """
        run = self.active_run()

        # consolidated_dict = self.pypads.cache.get('consolidated_dict', None)
        # if consolidated_dict is not None:
        #     # Dump data to disk
        #     self.log_mem_artifact("consolidated_log", consolidated_dict, write_format=FileFormats.json)

        chached_fns = self._get_teardown_cache()
        fn_list = [v for i, v in chached_fns.items()]
        fn_list.sort(key=lambda t: t.order)
        for fn in fn_list:
            try:
                fn(self.pypads,
                   _pypads_env=LoggerEnv(parameter=dict(),
                                         experiment_id=run.info.experiment_id,
                                         run_id=run.info.run_id),
                   data={"category": "TearDownFn"})
            except (KeyboardInterrupt, Exception) as e:
                logger.warning("Failed running post run function " +
                               fn.__name__ + " because of exception: " +
                               str(e))

        mlflow.end_run()

        # --- Clean tmp files in disk cache after run ---
        folder = get_temp_folder(run)
        if os.path.exists(folder):
            import shutil
            shutil.rmtree(folder)
Ejemplo n.º 4
0
def data_path(data, *path, default=None, warning=None):
    """
    Gets an data item of given dict at path
    :param data:
    :param path:
    :param default:
    :param warning:
    :return:
    """
    cur = data
    for i, p in enumerate(path):
        if isinstance(cur, list):
            out = []
            for list_element in cur:
                value = data_path(list_element, *path[i:])
                if value is not None:
                    if isinstance(value, list) and not len(path[i:]) == 0:
                        out.extend(value)
                    else:
                        out.append(value)
            return out if len(out) > 0 else default
        elif p in cur:
            # If list recursively call itself
            # Multiple return values needed instead of one
            cur = cur[p]
        else:
            if warning is not None:
                logger.warning(warning)
            return default
    return cur
Ejemplo n.º 5
0
        def commit(pads, *args, **kwargs):
            message = "Added results for run " + pads.api.active_run(
            ).info.run_id
            pads.managed_result_git.commit_changes(message=message)

            repo = pads.managed_result_git.repo
            remotes = repo.remotes

            if not remotes:
                logger.warning(
                    "Your results don't have any remote repository set. Set a remote repository for"
                    "to enable automatic pushing.")
            else:
                for remote in remotes:
                    name, url = remote.name, list(remote.urls)[0]
                    try:
                        # check if remote repo is bare and if it is initialize it with a temporary local repo
                        pads.managed_result_git.is_remote_empty(remote=name,
                                                                remote_url=url,
                                                                init=True)
                        # stash current state
                        repo.git.stash('push', '--include-untracked')
                        # Force pull
                        repo.git.pull(name, 'master',
                                      '--allow-unrelated-histories')
                        # Push merged changes
                        repo.git.push(name, 'master')
                        logger.info("Pushed your results automatically to " +
                                    name + " @:" + url)
                        # pop the stash
                        repo.git.stash('pop')
                    except Exception as e:
                        logger.error(
                            "pushing logs to remote failed due to this error '{}'"
                            .format(str(e)))
Ejemplo n.º 6
0
def splitter_output(result, fn):
    # check if the output of the splitter is a tuple of indices
    try:
        if isinstance(result, Tuple):
            if "sklearn" in fn.__module__:
                return result[0].tolist(), result[1].tolist(), None
            elif "default_split" in fn.__name__:
                return result
            else:
                if len(result) < 4:
                    result_ = [_tolist(r) for r in result]
                    return tuple(result_ + [None] * (3 - len(result_)))
                else:
                    return None, None, None
        else:
            if "torch" in fn.__module__:
                if hasattr(fn, "_dataset"):
                    if hasattr(fn._dataset, "train"):
                        if fn._dataset.train:
                            return _tolist(result), None, None
                        else:
                            return None, _tolist(result), None
                return _tolist(result), None, None
            else:
                return None, None, None
    except Exception as e:
        logger.warning("Split tracking ommitted due to exception {}".format(
            str(e)))
        return None, None, None
Ejemplo n.º 7
0
def numpy_crawler(obj: Crawler, target_columns=None, **kwargs):
    logger.info("Detecting a dataset object of type 'numpy.ndarray'. Crawling any available metadata...")
    # , (obj.data[:, i].min(), obj.data[:, i].max())
    if len(obj.data.shape) == 2:
        features = [(str(i), str(obj.data[:, i].dtype), False) for i in
                    range(obj.data.shape[1])]
    else:
        # TODO for multidim datasets
        features = None
    metadata = {"type": str(obj.format), "shape": obj.data.shape, "features": features}
    metadata = {**metadata, **kwargs}
    targets = None
    try:
        if target_columns:
            targets = obj.data[:, target_columns]
            if isinstance(target_columns, Iterable):
                for c in target_columns:
                    feature = metadata["features"][c]
                    metadata["features"][c] = (feature[0], feature[1], True)
            else:
                feature = metadata["features"][target_columns]
                metadata["features"][target_columns] = (feature[0], feature[1], True)
    except Exception as e:
        logger.warning(str(e))
    return obj.data, metadata, targets
Ejemplo n.º 8
0
    def activate_tracking(self,
                          reload_modules=False,
                          reload_warnings=True,
                          clear_imports=False,
                          affected_modules=None):
        """
        Function to duck punch all objects defined in the mapping files. This should at best be called before importing
        any libraries.
        :param affected_modules: Affected modules of the mapping files.
        :param clear_imports: Clear imports after punching. CAREFUL THIS IS EXPERIMENTAL!
        :param reload_warnings: Show warnings of affected modules which were already imported before the importlib was extended.
        :param reload_modules: Force a reload of affected modules. CAREFUL THIS IS EXPERIMENTAL!
        :return:
        """
        if affected_modules is None:
            # Modules are affected if they are mapped by a library or are already punched
            affected_modules = self.wrap_manager.module_wrapper.punched_module_names | \
                               {l.name for l in self.mapping_registry.get_libraries()}

        global tracking_active
        if not tracking_active:
            logger.info("Activating tracking by extending importlib...")
            from pypads.app.pypads import set_current_pads
            set_current_pads(self)

            # Add our loader to the meta_path
            extend_import_module()

            import sys
            import importlib
            loaded_modules = [(name, module)
                              for name, module in sys.modules.items()]
            for name, module in loaded_modules:
                if self.is_affected_module(name, affected_modules):
                    if reload_warnings:
                        logger.warning(
                            name +
                            " was imported before PyPads. To enable tracking import PyPads before or use "
                            "reload_modules / clear_imports. Every already created instance is not tracked."
                        )

                    if clear_imports:
                        del sys.modules[name]

                    if reload_modules:
                        try:
                            spec = importlib.util.find_spec(module.__name__)
                            duck_punch_loader(spec)
                            loader = spec.loader
                            module = loader.load_module(module.__name__)
                            loader.exec_module(module)
                            importlib.reload(module)
                        except Exception as e:
                            logger.debug("Couldn't reload module " + str(e))

            tracking_active = True
        else:
            # TODO check if a second tracker / tracker activation doesn't break the tracking
            logger.warning("Tracking was already activated.")
        return self
Ejemplo n.º 9
0
def read_pickle(p):
    try:
        with open(p, "rb") as fd:
            return pickle.load(fd)
    except FileNotFoundError:
        return None
    except Exception as e:
        logger.warning("Couldn't read pickle file. " + str(e))
Ejemplo n.º 10
0
 def test_3d_mnist(self):
     # --------------------------- setup of the tracking ---------------------------
     try:
         import timeit
         t = timeit.Timer(torch_3d_mnist_example)
         print(t.timeit(1))
     except RuntimeError as e:
         logger.warning("Torch bug on re-import: {}".format(str(e)))
Ejemplo n.º 11
0
 def commit_changes(self, message=""):
     try:
         self.add_untracked_files()
         self._commit(message)
     except Exception as e:
         logger.warning("Failed to commit due to following exception: %s" %
                        str(e))
         pass
Ejemplo n.º 12
0
 def _add_git_ignore(self):
     try:
         with open(self.repo.working_dir + "/.gitignore", "w") as file:
             file.write(GIT_IGNORE)
         self.repo.git.add(A=True)
     except Exception as e:
         logger.warning(
             "Could add .gitignore file to the repo due to this %s" %
             str(e))
Ejemplo n.º 13
0
def write_pickle(p, o):
    try:
        with open(p + ".pickle", "wb+") as fd:
            pickle.dump(o, fd)
            return fd.name
    except Exception as e:
        logger.warning(
            "Couldn't pickle output. Trying to save toString instead. " +
            str(e))
        return write_text(p, o)
Ejemplo n.º 14
0
 def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs):
     if isinstance(error, StopIteration):
         logger.warning(
             "Ignoring recovery of this StopIteration error: {}".format(
                 error))
         original = _pypads_env.call.call_id.context.original(
             _pypads_env.callback)
         return original(ctx, *args, **kwargs)
     else:
         super()._handle_error(*args, ctx, _pypads_env, error, **kwargs)
Ejemplo n.º 15
0
def read_json(p):
    try:
        with open(p, "r") as fd:
            return json.load(fd)
    except FileNotFoundError:
        return None
    except Exception as e:
        logger.warning(
            "Couldn't read artifact as json. Trying to read it as text instead. "
            + str(e))
        return read_text(p)
Ejemplo n.º 16
0
 def __new__(cls, *args, **kwargs):
     try:
         from pypads.app.pypads import get_current_pads
         if get_current_pads() is not None:
             logger.warning(
                 "Currently only one tracker can be activated at once."
                 "PyPads was already intialized. Reusing the old instance.")
             return get_current_pads()
     except Exception as e:
         pass
     return super().__new__(cls)
Ejemplo n.º 17
0
 def _add_json_ld(self, entry, json_ld, graph):
     for j in json_ld:
         if store_hash(graph.identifier, str(j)):
             j["@context"] = self._convert_context(
                 dict_merge(
                     entry.context,
                     j["@context"])) if "@context" in j else entry.context
             try:
                 graph.parse(data=dumps(j), format="json-ld")
             except Exception:
                 logger.warning(f"Couldn't translate {j} to rdf.")
Ejemplo n.º 18
0
def read_artifact(path, read_format: FileFormats = None):
    if read_format is None:
        file_extension = path.split('.')[-1]
        read_format = get_by_value_in_enum(file_extension, FileFormats)
        if not read_format:
            logger.warning("Configured read format " + read_format +
                           " not supported! ")
            return
    try:
        data = readers[read_format](path)
    except Exception as e:
        data = None
    return data
Ejemplo n.º 19
0
 def __init__(self, name, description):
     """
     Constructor for an anchor.
     :param name: Name of the anchor.
     :param description: String describing the anchor and its purpose.
     """
     self._name = name
     self._description = description
     if self._name in anchors:
         logger.warning("Anchor with name {} already exists".format(
             self._name))
         pass
     anchors[self._name] = self
Ejemplo n.º 20
0
def write_json(p, o):
    try:
        with open(p + ".json", "w+") as fd:
            if isinstance(o, str):
                fd.write(o)
                # TODO check if valid json?
            else:
                json.dump(o, fd)
            return fd.name
    except Exception as e:
        logger.warning(
            "Couldn't write meta as json. Trying to save it as text instead. "
            + str(e))
        return write_text(p, o)
Ejemplo n.º 21
0
def dataframe_crawler(obj: Crawler, target_columns, **kwargs):
    logger.info("Detecting a dataset object of type 'pandas.DataFrame'. Crawling any available metadata...")
    data = obj.data
    features = []
    for i, col in enumerate(data.columns):
        flag = col in target_columns if target_columns is not None else False
        features.append((col, str(data[col].dtype), flag))
    metadata = {"type": str(obj.format), "shape": data.shape, "features": features}
    metadata = {**metadata, **kwargs}
    targets = None
    if target_columns is not None:
        targets = data[target_columns].values
    else:
        logger.warning("Target values might be innaccurate or not tracked.")
    return data, metadata, targets
Ejemplo n.º 22
0
 def test_torch_sequential_class(self):
     # --------------------------- setup of the tracking ---------------------------
     # Activate tracking of pypads
     from pypads.app.base import PyPads
     tracker = PyPads(autostart="MNIST-Torch", setup_fns=[])
     try:
         import timeit
         t = timeit.Timer(torch_simple_example)
         print(t.timeit(1))
     except RuntimeError as e:
         logger.warning("Torch bug on re-import: {}".format(str(e)))
     # --------------------------- asserts ---------------------------
     # TODO Add asserts
     # !-------------------------- asserts ---------------------------
     tracker.api.end_run()
Ejemplo n.º 23
0
    def _call(self, *args, _pypads_env: LoggerEnv, _logger_call, _logger_output, **kwargs):
        pads = _pypads_env.pypads

        if not pads.cache.run_exists("loguru_logger"):
            std_out_logger = LogTO(parent=_logger_output)
            pads.cache.run_add("loguru_logger", std_out_logger)

            from pypads.utils.logging_util import get_temp_folder
            folder = get_temp_folder()
            from pypads.pads_loguru import logger_manager
            lid = logger_manager.add(os.path.join(folder, "run_" + pads.api.active_run().info.run_id + ".log"),
                                     rotation="50 MB",
                                     enqueue=True)
            pads.cache.run_add("loguru_logger_lid", lid)
        else:
            logger.warning("LoguruRSF already registered")
Ejemplo n.º 24
0
    def start_track(self, experiment_name=None, disable_run_init=False):
        """
        Start a new run to track.
        :param experiment_name: The name of the mlflow experiment
        :param disable_run_init: Flag to indicate if the run_init functions are to be run on an already existing run.
        :return:
        """
        if not tracking_active:
            self.activate_tracking()

        # check if there is already an active run
        run = mlflow.active_run()
        experiment = None
        if run is None:
            experiment_name = experiment_name or DEFAULT_EXPERIMENT_NAME
            # Create run if run doesn't already exist
            experiment = mlflow.get_experiment_by_name(experiment_name)
            experiment_id = experiment.experiment_id if experiment else mlflow.create_experiment(
                experiment_name)
            run = self.api.start_run(experiment_id=experiment_id)
        else:
            # if not disable_run_init:
            #     self.api.run_setups(_pypads_env=LoggerEnv(parameter=dict(), experiment_id=experiment_id, run_id=run_id,
            #                                               data={"category": "SetupFn"}))
            if experiment_name:
                # Check if we're still in the same experiment
                experiment = mlflow.get_experiment_by_name(experiment_name)
                experiment_id = experiment.experiment_id if experiment else mlflow.create_experiment(
                    experiment_name)
                if run.info.experiment_id != experiment_id:
                    experiment = mlflow.get_experiment_by_name(experiment_name)

        if experiment is None:
            experiment = self.backend.get_experiment(run.info.experiment_id)

        # override active run if used
        if experiment_name and run.info.experiment_id != experiment.experiment_id:
            logger.warning(
                "Active experiment_id of run doesn't match given input name " +
                experiment_name + ". Recreating new run.")
            try:
                self.api.start_run(experiment_id=experiment.experiment_id,
                                   nested=True)
            except Exception:
                mlflow.end_run()
                self.api.start_run(experiment_id=experiment.experiment_id)
        return self
Ejemplo n.º 25
0
def _to_node_label(wrappee, ref):
    if ref is not None:
        try:
            return str(wrappee) + str(id(ref))
        except Exception as e:
            logger.warning(
                "Couldn't get the representation of wrappee. Fallback to id " +
                str(id(ref)) + ". " + str(e))
            return str(id(wrappee)) + str(id(ref))
    else:
        try:
            return str(wrappee)
        except Exception as e:
            logger.warning(
                "Couldn't get representation of the wrappee. Fallback to id " +
                str(id(wrappee)) + ". " + str(e))
            return str(id(wrappee))
Ejemplo n.º 26
0
    def _call(self, *args, _pypads_env: LoggerEnv, _logger_call, _logger_output, **kwargs):
        from pypads.app.pypads import get_current_pads
        pads = get_current_pads()

        if not pads.cache.run_exists("std_out_logger"):
            std_out_logger = LogTO(parent=_logger_output, path="logfile.log")
            pads.cache.run_add("std_out_logger", std_out_logger)
        else:
            logger.warning("StdOutRSF already registered")
            return

        import sys

        class Logger(object):
            def __init__(self):
                import re
                temp_folder = get_temp_folder()
                if not os.path.isdir(temp_folder):
                    os.mkdir(temp_folder)
                # TODO close file?
                self.log = open(os.path.join(temp_folder, "logfile.log"), "a")
                self.re = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]')

            def write(self, message):
                self.log.write(self.re.sub('', message))
                self.log.flush()

            def flush(self):
                # this flush method is needed for python 3 compatibility.
                # this handles the flush command by doing nothing.
                # you might want to specify some extra behavior here.
                pass

        stdout_logger = Logger()

        if hasattr(sys.stdout,'original_write'):
            original_function = getattr(sys.stdout, 'original_write')
        else:
            original_function = getattr(sys.stdout, 'write')
        setattr(sys.stdout, 'original_write', original_function)

        def modified_function(message):
            stdout_logger.write(message)
            sys.stdout.original_write(message)

        setattr(sys.stdout, 'write', modified_function)
Ejemplo n.º 27
0
    def __pre__(self, ctx, *args,
                _logger_call, _logger_output, _args, _kwargs, **kwargs):
        """

        :param ctx:
        :param args:
        :param kwargs:
        :return:
        """
        from pypads.app.pypads import get_current_pads
        pads = get_current_pads()

        probabilities = None
        try:
            probabilities = ctx.predict(*_args, **_kwargs)
        except Exception as e:
            logger.warning("Couldn't compute probabilities because %s" % str(e))

        pads.cache.run_add("probabilities", probabilities)
Ejemplo n.º 28
0
 def add_result_remote(self, remote, uri):
     """
     Add a remote to track the results.
     :param remote: Remote name to be added
     :param uri: Remote address to be added
     :return:
     """
     if self.managed_result_git is None:
         logger.warning(
             "Can only add remotes to the result directory if it is managed by pypads git."
         )
     else:
         try:
             self.managed_result_git.remote = remote
             self.managed_result_git.remote_uri = uri
             self.managed_result_git.repo.create_remote(remote, uri)
         except Exception as e:
             logger.warning("Failed to add remote due to exception: " +
                            str(e))
Ejemplo n.º 29
0
    def function_type(self):
        """
        Get the function type of the accessor function.
        :return:
        """

        # Return if already found
        if self._function_type:
            return self._function_type

        if self.context.is_module():
            function_type = "staticmethod"
        else:
            # Get the function type (Method, unbound etc.)
            try:
                real_ctx = self.real_context()
                if real_ctx is None:
                    raise ValueError("Couldn't find real context.")
                function_type = type(
                    real_ctx.get_dict()[self.wrappee.__name__])
            except Exception as e:
                logger.warning("Couldn't get function type of '" +
                               str(self.wrappee.__name__) + "' on '" +
                               str(self.real_context()) + ". Omit logging. " +
                               str(e))
                return None

            # TODO Can we find less error prone ways to get the type of the given fn?
            # Delegate decorator of sklearn obfuscates the real type.
            # if is_package_available("sklearn"):
            #     from sklearn.utils.metaestimators import _IffHasAttrDescriptor
            #     if function_type == _IffHasAttrDescriptor:
            if str(
                    function_type
            ) == "<class 'sklearn.utils.metaestimators._IffHasAttrDescriptor'>":
                function_type = "wrapped"
                self.wrappee = self.real_context().get_dict()[
                    self.wrappee.__name__]

            # Set cached result
            self._function_type = function_type
        return function_type
Ejemplo n.º 30
0
    def _call(self, *args, _pypads_env: LoggerEnv, **kwargs):
        pads = _pypads_env.pypads

        file = os.path.join(get_temp_folder(), str(os.getpid()) + "_trace.txt")
        proc = None
        if platform == "linux" or platform == "linux2":
            # https://stackoverflow.com/questions/4789837/how-to-terminate-a-python-subprocess-launched-with-shell-true
            proc = subprocess.Popen(
                ['sudo strace -p ' + str(os.getpid()) + ' &> ' + file],
                shell=True,
                preexec_fn=os.setsid)

        elif platform == "darwin":
            proc = subprocess.Popen(
                ['sudo dtruss -f -p ' + str(os.getpid()) + ' 2> ' + file],
                shell=True,
                preexec_fn=os.setsid)

        elif platform == "win32":
            logger.warning("No tracing supported on windows currently.")

        if proc:
            pads.api.register_teardown(
                "stop_dtrace_" + str(proc.pid),
                STraceStop(_pypads_proc=proc, _pypads_trace_file=file))
            if proc.poll() == 1:
                logger.warning(
                    "Can't dtruss/strace without sudo rights. To enable tracking allow user to execute dtruss/strace "
                    "without sudo password with polkit or by modifiying visudo - /etc/sudoers:"
                    "username ALL=NOPASSWD: /usr/bin/dtruss. To get the path to dtruss you can use 'which dtruss'. "
                    "Be carefull about allowing permanent sudo rights to dtruss. This might introduce security risks."
                )

        def safety_hook():
            """
            A None value indicates that the process hasn't terminated yet.
            """
            if proc and proc.poll() is None:
                os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
                proc.terminate()

        pads.add_exit_fn(safety_hook)