Ejemplo n.º 1
0
    def add_mapping(self, mapping: MappingCollection, key=None):
        """
        Method to add a mapping to the registry.
        :param mapping: Mapping collection to be added to the registry.
        :param key: Key for the mapping collection. TODO remove key?
        :return:
        """
        if key is None and isinstance(mapping, MappingFile):
            key = mapping.lib

        if key is None:
            logger.error(
                "Couldn't add mapping " + str(mapping) +
                " to the pypads mapping registry. Lib or key are undefined.")
        else:
            mapping_repo = self._pypads.mapping_repository
            mapping_hash = mapping._hash
            if not mapping_repo.has_object(uid=mapping_hash):
                mapping_object = mapping_repo.get_object(uid=mapping_hash)
                # Just init context once here
                with mapping_object.init_context():
                    if isinstance(mapping, MappingFile):
                        mapping.mapping_file = mapping_object.log_artifact(
                            local_path=mapping.path,
                            description="A copy of the mapping file used.")
                    mapping_object.log_json(mapping)
            self._mappings[key] = mapping
Ejemplo n.º 2
0
 def log_json(self, entry, uid=None):
     if not isinstance(entry, dict):
         if isinstance(entry, BaseStorageModel):
             entry = entry.dict(by_alias=True)
         elif isinstance(entry, ModelObject):
             entry = entry.dict(force=False, by_alias=True)
         else:
             raise ValueError(f"{entry} of wrong type.")
     if "storage_type" not in entry:
         logger.error(
             f"Tried to log an invalid entry. Json logged data has to define a storage_type. For entry {entry}"
         )
         return None
     if entry['storage_type'] == ResultType.embedded:
         # Instead of a path an embedded object should return the object itself and not be stored to our backend
         return entry
     if uid is not None:
         entry["uid"] = uid
     reference = to_reference(entry)
     _id = reference.id
     entry["_id"] = _id
     storage_type = entry["storage_type"].value if isinstance(
         entry["storage_type"], ResultType) else entry["storage_type"]
     try:
         try:
             self._db[storage_type].insert_one(jsonable_encoder(entry))
         except DuplicateKeyError as e:
             self._db[storage_type].replace_one({"_id": _id},
                                                jsonable_encoder(entry))
     except Exception as e:
         # TODO maybe handle duplicates
         raise e
     return reference
Ejemplo n.º 3
0
 def finish(self, call):
     if call in self._call_stack:
         self._call_stack.remove(call)
         # TODO clear memory in call_objects?
     else:
         logger.error("Tried to finish call which is not on the stack. " +
                      str(call))
Ejemplo n.º 4
0
def _get_gpu_usage(gpu_count):
    import pynvml
    gpus = []
    for i in range(gpu_count):
        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
        try:
            util = pynvml.nvmlDeviceGetUtilizationRates(handle)
            memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
            temp = pynvml.nvmlDeviceGetTemperature(handle,
                                                   pynvml.NVML_TEMPERATURE_GPU)
            try:
                power_usage = (
                    pynvml.nvmlDeviceGetPowerUsage(handle) /
                    1000.0) / (pynvml.nvmlDeviceGetEnforcedPowerLimit(handle) /
                               1000.0) * 100
            except pynvml.NVMLError as e:
                logger.error(
                    "Coudln't extract power usage due to NVML exception: {}".
                    format(str(e)))
                power_usage = -9999
            gpus.append(
                (handle, util.gpu, util.memory,
                 (memory.used / float(memory.total)) * 100, temp, power_usage))
        except pynvml.NVMLError as e:
            logger.error(
                "Coudln't extract gpu usage information due to NVML exception: {}"
                .format(str(e)))
            return None
    return gpus
Ejemplo n.º 5
0
        def commit(pads, *args, **kwargs):
            message = "Added results for run " + pads.api.active_run(
            ).info.run_id
            pads.managed_result_git.commit_changes(message=message)

            repo = pads.managed_result_git.repo
            remotes = repo.remotes

            if not remotes:
                logger.warning(
                    "Your results don't have any remote repository set. Set a remote repository for"
                    "to enable automatic pushing.")
            else:
                for remote in remotes:
                    name, url = remote.name, list(remote.urls)[0]
                    try:
                        # check if remote repo is bare and if it is initialize it with a temporary local repo
                        pads.managed_result_git.is_remote_empty(remote=name,
                                                                remote_url=url,
                                                                init=True)
                        # stash current state
                        repo.git.stash('push', '--include-untracked')
                        # Force pull
                        repo.git.pull(name, 'master',
                                      '--allow-unrelated-histories')
                        # Push merged changes
                        repo.git.push(name, 'master')
                        logger.info("Pushed your results automatically to " +
                                    name + " @:" + url)
                        # pop the stash
                        repo.git.stash('pop')
                    except Exception as e:
                        logger.error(
                            "pushing logs to remote failed due to this error '{}'"
                            .format(str(e)))
Ejemplo n.º 6
0
            def entry(_cls, *args, _pypads_context=context, pypads_mapped_by=mappings, **kwargs):
                logger.debug("Call to tracked class method " + str(fn))

                global error
                if self._pypads.api.active_run():
                    error = False
                    with self._make_call(_cls, fn_reference) as call:
                        accessor = call.call_id
                        # add the function to the callback stack
                        callback = types.MethodType(fn, _cls)

                        # for every hook add
                        if self._is_skip_recursion(accessor):
                            logger.info("Skipping " + str(accessor.context.container.__name__) + "." + str(
                                accessor.wrappee.__name__))
                            out = callback(*args, **kwargs)
                            return out

                        hooks = context.get_hooks(fn)

                        for (h, config) in hooks:
                            c = self._add_hook(h, config, callback, call, context.get_wrap_metas(fn))
                            if c:
                                callback = types.MethodType(c, _cls)

                        # start executing the stack
                        out = callback(*args, **kwargs)
                else:
                    if not error:
                        error = True
                        logger.error(
                            "No run was active to log your hooks. You may want to start a run with PyPads().start_track()")
                    callback = types.MethodType(fn, _cls)
                    out = callback(*args, **kwargs)
                return out
Ejemplo n.º 7
0
 def load(self):
     from pypads.app.pypads import get_current_pads
     pads = get_current_pads()
     if self.backend_uri is not pads.backend.uri:
         # TODO init backend if possible?
         logger.error("Can't load object due to unavailable backend.")
         return None
     return pads.backend.get(self.uid, self.storage_type)
Ejemplo n.º 8
0
 def restore_patch(self, patch):
     """
     Takes a pypads created patch and apply it on the current repository
     :param patch: path to the patch file
     :return:
     """
     try:
         self.repo.git.apply([patch])
     except (GitCommandError, GitError) as e:
         logger.error(
             "Failed to restore state of the repository from patch file due to exception {}"
             .format(str(e)))
Ejemplo n.º 9
0
 def defensive_exit(signum=None, frame=None):
     global executed_exit_fns
     try:
         if fn not in executed_exit_fns:
             logger.debug(f"Running exit fn {fn}.")
             out = fn()
             executed_exit_fns.add(fn)
             return out
         logger.debug(f"Already ran exit fn {fn}.")
         return None
     except (KeyboardInterrupt, Exception) as e:
         logger.error("Couldn't run atexit function " + fn.__name__ +
                      " because of " + str(e))
Ejemplo n.º 10
0
 def mapping_applicable_filter(name):
     if hasattr(ctx.container, name):
         try:
             return self.is_applicable(ctx,
                                       getattr(ctx.container, name))
         except RecursionError as rerr:
             logger.error(
                 "Recursion error on '" + str(ctx) +
                 "'. This might be because __get_attr__ is being wrapped. "
                 + str(rerr))
     else:
         logger.debug("Can't access attribute '" + str(name) +
                      "' on '" + str(ctx) + "'. Skipping.")
     return False
Ejemplo n.º 11
0
 def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs):
     """
     Handle error for DefensiveCallableMixin
     :param args:
     :param ctx:
     :param _pypads_env:
     :param error:
     :param kwargs:
     :return:
     """
     try:
         raise error
     except (NoCallAllowedError, Exception) as e:
         # do nothing and call the next run function
         logger.error(
             f"Logging failed for {str(self)} with error: {str(error)} \nTrace:\n{traceback.format_exc()}")
         pass
Ejemplo n.º 12
0
 def _init_git_repo(self, path, source=True):
     """
     Initializes a new git repo if none is found.
     :param path:
     :param source:
     :return:
     """
     import git
     try:
         self.repo = git.Repo.init(path, bare=False)
         self._add_git_ignore()
         if source:
             self.commit_changes(message="Pypads initial commit")
         logger.info("Repository was successfully initialized")
     except (InvalidGitRepositoryError, GitCommandError, GitError) as e:
         logger.error(
             "No repository was present and git could not initialize a repository in this directory"
             " {0} because of exception: {1}".format(path, e))
Ejemplo n.º 13
0
def dict_merge_caches(*dicts):
    """
    Merge two dicts. Entries are overwritten if not mergeable. Cache is supported.
    :param dicts: dicts to merge
    :return:
    """
    merged = {}
    for d in dicts:
        if isinstance(d, dict):
            for key, value in d.items():
                if isinstance(value, dict):
                    node = merged.setdefault(key, {})
                    merged[key] = dict_merge(node, value)
                elif isinstance(value, list):
                    node = merged.setdefault(key, [])
                    try:
                        node.extend(value)
                    except AttributeError as e:
                        try:
                            node = [node]
                            node.extend(value)
                        except Exception as e:
                            logger.error(
                                "Failed merging dictionaries in dict_merge_caches : {}"
                                .format(str(e)))
                    merged[key] = node
                elif isinstance(value, set):
                    s: set = merged.setdefault(key, set())
                    for v in value:
                        if v in s:
                            merged = dict_merge(v, s.pop(v))
                            s.add(merged)
                        else:
                            s.add(v)
                elif isinstance(value, Cache):
                    node = merged.setdefault(key, Cache())
                    merged[key] = value.merge(node)
                else:
                    merged[key] = value
    return merged
Ejemplo n.º 14
0
    def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs):
        """
        Function to handle an error executing the logging functionality. In general this should add a failure tag and
        log to console.
        :param args: Arguments passed to the function
        :param ctx: Context of the function
        :param _pypads_env: Pypads environment
        :param error: Exception which was raised on the execution
        :param kwargs: Kwargs passed to the function
        :return:
        """
        try:
            raise error from error
        except NotImplementedError:

            # Ignore if only pre or post where defined
            return None, 0
        except (NoCallAllowedError, PassThroughException) as e:

            # Pass No Call Allowed Error through
            raise e from e
        except Exception as e:

            # Catch other exceptions for this single logger
            try:
                # Failure at timestamp
                # TODO Failure list mlflow.get_run(run_id=_pypads_env.run_id).tags
                mlflow.set_tag(
                    f"pypads.failure.{kwargs['_logger_call'].creator.name}.{str(time.time())}",
                    str(error))
            except Exception as e:
                pass
            logger.error(
                f"Tracking failed for {str(_pypads_env)} with: {str(error)} \nTrace:\n{traceback.format_exc()}"
            )
            return None, 0
Ejemplo n.º 15
0
    def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs):
        """
        Handle error for DefensiveCallableMixin
        :param args:
        :param ctx:
        :param _pypads_env:
        :param error:
        :param kwargs:
        :return:
        """
        try:
            logger.error("Logging failed for " + str(self) + ": " +
                         str(error) + "\nTrace:\n" + traceback.format_exc())
            raise error
        except MissingDependencyError as e:
            return _pypads_env.callback(*args, **kwargs)
        except NoCallAllowedError as e:
            # Call next wrapped callback if no call was allowed due to the settings or environment
            _pypads_hook_params = _pypads_env.parameter
            return self.__call_wrapped__(ctx,
                                         _pypads_env=_pypads_env,
                                         _args=args,
                                         _kwargs=kwargs,
                                         **_pypads_hook_params)
        except Exception as e:
            # Try to call the original unwrapped function if something broke
            original = _pypads_env.call.call_id.context.original(
                _pypads_env.callback)
            if callable(original):
                try:
                    logger.error("Trying to recover from: " + str(e))
                    out = original(ctx, *args, **kwargs)
                    logger.success("Succeeded recovering on error : " + str(e))
                    return out
                except TypeError as e:
                    logger.error(
                        "Recovering failed due to: " + str(e) +
                        ". Trying to call without passed ctx. This might be due to an error in the wrapping."
                    )
                    out = original(*args, **kwargs)
                    logger.success("Succeeded recovering on error : " + str(e))
                    return out
            else:

                # Original function was not accessible
                raise Exception(
                    "Couldn't fall back to original function for " + str(
                        _pypads_env.logger_call.call_id.context.original_name(
                            _pypads_env.callback)) + " on " +
                    str(_pypads_env.logger_call.call_id.context) +
                    ". Can't recover from " + str(error))
Ejemplo n.º 16
0
    def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv,
                 _logger_call, _logger_output: Union['ParametersILFOutput',
                                                     LoggerOutput], **kwargs):
        """
        Function logging the parameters of the current pipeline object function call.
        """

        mapping_data = _pypads_env.data

        # Get the estimator name
        estimator = data_str(mapping_data,
                             "estimator",
                             "@schema",
                             "rdfs:label",
                             default=ctx.__class__.__name__)

        hyper_params = FunctionParametersTO(
            estimator=estimator,
            description=f"The parameters of estimator {estimator} with {ctx}.",
            parent=_logger_output)

        # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs
        relevant_parameters = []

        if data_path(
                _pypads_env.data,
                "estimator",
                "parameters",
                warning="No parameters are defined on the mapping file for " +
                str(ctx.__class__) +
                ". Trying to log parameters without schema definition programmatically."
        ):
            relevant_parameters = []
            for parameter_type, parameters in data_path(mapping_data,
                                                        "estimator",
                                                        "parameters",
                                                        default={}).items():
                for parameter in parameters:
                    parameter = data_path(parameter, "@schema")
                    key = data_path(parameter, "padre:path")
                    name = data_path(parameter, "rdfs:label")

                    param_dict = {
                        "name": name,
                        "description": data_path(parameter, "rdfs:comment"),
                        "parameter_type": data_path(parameter,
                                                    "padre:value_type")
                    }

                    if hasattr(ctx, key):
                        value = getattr(ctx, key)
                    else:
                        _kwargs = getattr(kwargs, "_kwargs")
                        if hasattr(_kwargs, key):
                            value = getattr(_kwargs, key)
                        else:
                            logger.warning(
                                f"Couldn't extract value of in schema defined parameter {parameter}."
                            )
                            continue
                    param_dict["value"] = value
                    add_data(mapping_data,
                             "is_a",
                             value=data_path(parameter, "@id"))
                    relevant_parameters.append(param_dict)

        else:
            get_params = getattr(ctx, "get_params", None)
            if callable(get_params):

                # Extracting via get_params (valid for sklearn)
                relevant_parameters = [{
                    "name": k,
                    "value": v
                } for k, v in ctx.get_params().items()]
            else:

                # Trying to get at least the named arguments
                relevant_parameters = [{
                    "name": k,
                    "value": v
                } for k, v in kwargs["_kwargs"].items()]

        for i, param in enumerate(relevant_parameters):
            name = data_path(param,
                             "name",
                             default="UnknownParameter" + str(i))
            description = data_path(param, "description")
            value = data_path(param, "value")
            parameter_type = data_path(param,
                                       "parameter_type",
                                       default=str(type(value)))

            try:
                from pypads.app.pypads import get_current_pads
                call_number = get_current_pads().call_tracker.call_number(
                    _pypads_env.call.call_id)
                hyper_params.persist_parameter(".".join(
                    [estimator, str(call_number), name]),
                                               str(value),
                                               param_type=parameter_type,
                                               description=description,
                                               additional_data=mapping_data)
            except Exception as e:
                logger.error(
                    f"Couldn't log parameter {estimator + '.' + name} with value {value}"
                )

        _logger_output.hyper_parameter_to = hyper_params.store()
Ejemplo n.º 17
0
    def track_class(self,
                    cls,
                    ctx=None,
                    fn_anchors: dict = None,
                    mappings: Dict[str, Mapping] = None,
                    additional_data=None):
        """
        Method to inject logging capabilities into a class functions
        :param additional_data: Additional meta data to be provided to the tracking. This should be used to map to rdf.
        :param cls: Class to extend
        :param ctx: Ctx which defined the function
        :param fn_anchors: {Functions :Anchors} to trigger on each function call of the class
        :param mappings: Mapping defining this extension
        :return: The extended function
        """
        if additional_data is None:
            additional_data = {}

        # Warn if ctx doesn't defined the function we want to track
        if ctx is not None and not hasattr(ctx, cls.__name__):
            logger.warning("Given context " + str(ctx) + " doesn't define " +
                           str(cls.__name__))
            ctx = None

        # If we don't have a valid ctx the class is unbound,
        # so we create a dummy ctx holding the class, otherwise we can extract the ctx path
        if ctx is not None:
            if hasattr(ctx, '__module__'
                       ) and ctx.__module__ is not str.__class__.__module__:
                ctx_path = ctx.__module__.__name__
            else:
                ctx_path = ctx.__name__
        else:
            ctx = ModuleType("<unbound-module>")
            setattr(ctx, cls.__name__, cls)
            ctx_path = ctx.__name__

        if fn_anchors is None:
            fn_anchors = {cls.__init__.__name__: [get_anchor("pypads_log")]}
        elif not isinstance(fn_anchors, dict):
            logger.error(
                'function anchors passed to track class has to be a dict of funtions, anchors'
            )
        else:
            for fn, anchors in fn_anchors.items():
                if not isinstance(anchors, Iterable):
                    fn_anchors[fn] = [anchors]

        for fn, anchors in fn_anchors.items():

            _anchors = set()
            for a in anchors:
                if isinstance(a, str):
                    anchor = get_anchor(a)
                    if anchor is None:
                        anchor = Anchor(a, "No description available")
                    _anchors.add(anchor)
                elif isinstance(a, Anchor):
                    _anchors.add(a)
            fn_anchors[fn] = _anchors

        # If no mapping was given a default mapping has to be created
        if mappings is None:
            logger.warning(
                "Tracking a function without a mapping definition. A default mapping will be generated."
            )
            _matched_mappings = set()
            if '__file__' in cls.__dict__:
                lib = cls.__dict__['__file__']
            else:
                lib = cls.__module__

            # For all events we want to hook to each class function
            for fn, anchors in fn_anchors.items():
                _mapping = Mapping(
                    PackagePathMatcher(ctx_path + "." + cls.__name__ + "." +
                                       fn),
                    make_run_time_mapping_collection(
                        lib), anchors, set(), {
                            **additional_data,
                            **{
                                "mapped_by":
                                "http://www.padre-lab.eu/onto/PyPadsApi"
                            }
                        })
                _matched_mappings.add(
                    MatchedMapping(
                        _mapping,
                        PackagePath(ctx_path + "." + cls.__name__ + "." + fn)))
        else:
            _matched_mappings = {
                MatchedMapping(
                    mapping,
                    PackagePath(ctx_path + "." + cls.__name__ + "." + fn))
                for fn, mapping in mappings.items()
            }

        # Wrap the function of given context and return it
        return self.pypads.wrap_manager.wrap(
            cls, ctx=ctx, matched_mappings=_matched_mappings)