def add_mapping(self, mapping: MappingCollection, key=None): """ Method to add a mapping to the registry. :param mapping: Mapping collection to be added to the registry. :param key: Key for the mapping collection. TODO remove key? :return: """ if key is None and isinstance(mapping, MappingFile): key = mapping.lib if key is None: logger.error( "Couldn't add mapping " + str(mapping) + " to the pypads mapping registry. Lib or key are undefined.") else: mapping_repo = self._pypads.mapping_repository mapping_hash = mapping._hash if not mapping_repo.has_object(uid=mapping_hash): mapping_object = mapping_repo.get_object(uid=mapping_hash) # Just init context once here with mapping_object.init_context(): if isinstance(mapping, MappingFile): mapping.mapping_file = mapping_object.log_artifact( local_path=mapping.path, description="A copy of the mapping file used.") mapping_object.log_json(mapping) self._mappings[key] = mapping
def log_json(self, entry, uid=None): if not isinstance(entry, dict): if isinstance(entry, BaseStorageModel): entry = entry.dict(by_alias=True) elif isinstance(entry, ModelObject): entry = entry.dict(force=False, by_alias=True) else: raise ValueError(f"{entry} of wrong type.") if "storage_type" not in entry: logger.error( f"Tried to log an invalid entry. Json logged data has to define a storage_type. For entry {entry}" ) return None if entry['storage_type'] == ResultType.embedded: # Instead of a path an embedded object should return the object itself and not be stored to our backend return entry if uid is not None: entry["uid"] = uid reference = to_reference(entry) _id = reference.id entry["_id"] = _id storage_type = entry["storage_type"].value if isinstance( entry["storage_type"], ResultType) else entry["storage_type"] try: try: self._db[storage_type].insert_one(jsonable_encoder(entry)) except DuplicateKeyError as e: self._db[storage_type].replace_one({"_id": _id}, jsonable_encoder(entry)) except Exception as e: # TODO maybe handle duplicates raise e return reference
def finish(self, call): if call in self._call_stack: self._call_stack.remove(call) # TODO clear memory in call_objects? else: logger.error("Tried to finish call which is not on the stack. " + str(call))
def _get_gpu_usage(gpu_count): import pynvml gpus = [] for i in range(gpu_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) try: util = pynvml.nvmlDeviceGetUtilizationRates(handle) memory = pynvml.nvmlDeviceGetMemoryInfo(handle) temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU) try: power_usage = ( pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0) / (pynvml.nvmlDeviceGetEnforcedPowerLimit(handle) / 1000.0) * 100 except pynvml.NVMLError as e: logger.error( "Coudln't extract power usage due to NVML exception: {}". format(str(e))) power_usage = -9999 gpus.append( (handle, util.gpu, util.memory, (memory.used / float(memory.total)) * 100, temp, power_usage)) except pynvml.NVMLError as e: logger.error( "Coudln't extract gpu usage information due to NVML exception: {}" .format(str(e))) return None return gpus
def commit(pads, *args, **kwargs): message = "Added results for run " + pads.api.active_run( ).info.run_id pads.managed_result_git.commit_changes(message=message) repo = pads.managed_result_git.repo remotes = repo.remotes if not remotes: logger.warning( "Your results don't have any remote repository set. Set a remote repository for" "to enable automatic pushing.") else: for remote in remotes: name, url = remote.name, list(remote.urls)[0] try: # check if remote repo is bare and if it is initialize it with a temporary local repo pads.managed_result_git.is_remote_empty(remote=name, remote_url=url, init=True) # stash current state repo.git.stash('push', '--include-untracked') # Force pull repo.git.pull(name, 'master', '--allow-unrelated-histories') # Push merged changes repo.git.push(name, 'master') logger.info("Pushed your results automatically to " + name + " @:" + url) # pop the stash repo.git.stash('pop') except Exception as e: logger.error( "pushing logs to remote failed due to this error '{}'" .format(str(e)))
def entry(_cls, *args, _pypads_context=context, pypads_mapped_by=mappings, **kwargs): logger.debug("Call to tracked class method " + str(fn)) global error if self._pypads.api.active_run(): error = False with self._make_call(_cls, fn_reference) as call: accessor = call.call_id # add the function to the callback stack callback = types.MethodType(fn, _cls) # for every hook add if self._is_skip_recursion(accessor): logger.info("Skipping " + str(accessor.context.container.__name__) + "." + str( accessor.wrappee.__name__)) out = callback(*args, **kwargs) return out hooks = context.get_hooks(fn) for (h, config) in hooks: c = self._add_hook(h, config, callback, call, context.get_wrap_metas(fn)) if c: callback = types.MethodType(c, _cls) # start executing the stack out = callback(*args, **kwargs) else: if not error: error = True logger.error( "No run was active to log your hooks. You may want to start a run with PyPads().start_track()") callback = types.MethodType(fn, _cls) out = callback(*args, **kwargs) return out
def load(self): from pypads.app.pypads import get_current_pads pads = get_current_pads() if self.backend_uri is not pads.backend.uri: # TODO init backend if possible? logger.error("Can't load object due to unavailable backend.") return None return pads.backend.get(self.uid, self.storage_type)
def restore_patch(self, patch): """ Takes a pypads created patch and apply it on the current repository :param patch: path to the patch file :return: """ try: self.repo.git.apply([patch]) except (GitCommandError, GitError) as e: logger.error( "Failed to restore state of the repository from patch file due to exception {}" .format(str(e)))
def defensive_exit(signum=None, frame=None): global executed_exit_fns try: if fn not in executed_exit_fns: logger.debug(f"Running exit fn {fn}.") out = fn() executed_exit_fns.add(fn) return out logger.debug(f"Already ran exit fn {fn}.") return None except (KeyboardInterrupt, Exception) as e: logger.error("Couldn't run atexit function " + fn.__name__ + " because of " + str(e))
def mapping_applicable_filter(name): if hasattr(ctx.container, name): try: return self.is_applicable(ctx, getattr(ctx.container, name)) except RecursionError as rerr: logger.error( "Recursion error on '" + str(ctx) + "'. This might be because __get_attr__ is being wrapped. " + str(rerr)) else: logger.debug("Can't access attribute '" + str(name) + "' on '" + str(ctx) + "'. Skipping.") return False
def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs): """ Handle error for DefensiveCallableMixin :param args: :param ctx: :param _pypads_env: :param error: :param kwargs: :return: """ try: raise error except (NoCallAllowedError, Exception) as e: # do nothing and call the next run function logger.error( f"Logging failed for {str(self)} with error: {str(error)} \nTrace:\n{traceback.format_exc()}") pass
def _init_git_repo(self, path, source=True): """ Initializes a new git repo if none is found. :param path: :param source: :return: """ import git try: self.repo = git.Repo.init(path, bare=False) self._add_git_ignore() if source: self.commit_changes(message="Pypads initial commit") logger.info("Repository was successfully initialized") except (InvalidGitRepositoryError, GitCommandError, GitError) as e: logger.error( "No repository was present and git could not initialize a repository in this directory" " {0} because of exception: {1}".format(path, e))
def dict_merge_caches(*dicts): """ Merge two dicts. Entries are overwritten if not mergeable. Cache is supported. :param dicts: dicts to merge :return: """ merged = {} for d in dicts: if isinstance(d, dict): for key, value in d.items(): if isinstance(value, dict): node = merged.setdefault(key, {}) merged[key] = dict_merge(node, value) elif isinstance(value, list): node = merged.setdefault(key, []) try: node.extend(value) except AttributeError as e: try: node = [node] node.extend(value) except Exception as e: logger.error( "Failed merging dictionaries in dict_merge_caches : {}" .format(str(e))) merged[key] = node elif isinstance(value, set): s: set = merged.setdefault(key, set()) for v in value: if v in s: merged = dict_merge(v, s.pop(v)) s.add(merged) else: s.add(v) elif isinstance(value, Cache): node = merged.setdefault(key, Cache()) merged[key] = value.merge(node) else: merged[key] = value return merged
def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs): """ Function to handle an error executing the logging functionality. In general this should add a failure tag and log to console. :param args: Arguments passed to the function :param ctx: Context of the function :param _pypads_env: Pypads environment :param error: Exception which was raised on the execution :param kwargs: Kwargs passed to the function :return: """ try: raise error from error except NotImplementedError: # Ignore if only pre or post where defined return None, 0 except (NoCallAllowedError, PassThroughException) as e: # Pass No Call Allowed Error through raise e from e except Exception as e: # Catch other exceptions for this single logger try: # Failure at timestamp # TODO Failure list mlflow.get_run(run_id=_pypads_env.run_id).tags mlflow.set_tag( f"pypads.failure.{kwargs['_logger_call'].creator.name}.{str(time.time())}", str(error)) except Exception as e: pass logger.error( f"Tracking failed for {str(_pypads_env)} with: {str(error)} \nTrace:\n{traceback.format_exc()}" ) return None, 0
def _handle_error(self, *args, ctx, _pypads_env, error, **kwargs): """ Handle error for DefensiveCallableMixin :param args: :param ctx: :param _pypads_env: :param error: :param kwargs: :return: """ try: logger.error("Logging failed for " + str(self) + ": " + str(error) + "\nTrace:\n" + traceback.format_exc()) raise error except MissingDependencyError as e: return _pypads_env.callback(*args, **kwargs) except NoCallAllowedError as e: # Call next wrapped callback if no call was allowed due to the settings or environment _pypads_hook_params = _pypads_env.parameter return self.__call_wrapped__(ctx, _pypads_env=_pypads_env, _args=args, _kwargs=kwargs, **_pypads_hook_params) except Exception as e: # Try to call the original unwrapped function if something broke original = _pypads_env.call.call_id.context.original( _pypads_env.callback) if callable(original): try: logger.error("Trying to recover from: " + str(e)) out = original(ctx, *args, **kwargs) logger.success("Succeeded recovering on error : " + str(e)) return out except TypeError as e: logger.error( "Recovering failed due to: " + str(e) + ". Trying to call without passed ctx. This might be due to an error in the wrapping." ) out = original(*args, **kwargs) logger.success("Succeeded recovering on error : " + str(e)) return out else: # Original function was not accessible raise Exception( "Couldn't fall back to original function for " + str( _pypads_env.logger_call.call_id.context.original_name( _pypads_env.callback)) + " on " + str(_pypads_env.logger_call.call_id.context) + ". Can't recover from " + str(error))
def __post__(self, ctx, *args, _pypads_env: InjectionLoggerEnv, _logger_call, _logger_output: Union['ParametersILFOutput', LoggerOutput], **kwargs): """ Function logging the parameters of the current pipeline object function call. """ mapping_data = _pypads_env.data # Get the estimator name estimator = data_str(mapping_data, "estimator", "@schema", "rdfs:label", default=ctx.__class__.__name__) hyper_params = FunctionParametersTO( estimator=estimator, description=f"The parameters of estimator {estimator} with {ctx}.", parent=_logger_output) # List of parameters to extract. Either provided by a mapping file or by get_params function or by _kwargs relevant_parameters = [] if data_path( _pypads_env.data, "estimator", "parameters", warning="No parameters are defined on the mapping file for " + str(ctx.__class__) + ". Trying to log parameters without schema definition programmatically." ): relevant_parameters = [] for parameter_type, parameters in data_path(mapping_data, "estimator", "parameters", default={}).items(): for parameter in parameters: parameter = data_path(parameter, "@schema") key = data_path(parameter, "padre:path") name = data_path(parameter, "rdfs:label") param_dict = { "name": name, "description": data_path(parameter, "rdfs:comment"), "parameter_type": data_path(parameter, "padre:value_type") } if hasattr(ctx, key): value = getattr(ctx, key) else: _kwargs = getattr(kwargs, "_kwargs") if hasattr(_kwargs, key): value = getattr(_kwargs, key) else: logger.warning( f"Couldn't extract value of in schema defined parameter {parameter}." ) continue param_dict["value"] = value add_data(mapping_data, "is_a", value=data_path(parameter, "@id")) relevant_parameters.append(param_dict) else: get_params = getattr(ctx, "get_params", None) if callable(get_params): # Extracting via get_params (valid for sklearn) relevant_parameters = [{ "name": k, "value": v } for k, v in ctx.get_params().items()] else: # Trying to get at least the named arguments relevant_parameters = [{ "name": k, "value": v } for k, v in kwargs["_kwargs"].items()] for i, param in enumerate(relevant_parameters): name = data_path(param, "name", default="UnknownParameter" + str(i)) description = data_path(param, "description") value = data_path(param, "value") parameter_type = data_path(param, "parameter_type", default=str(type(value))) try: from pypads.app.pypads import get_current_pads call_number = get_current_pads().call_tracker.call_number( _pypads_env.call.call_id) hyper_params.persist_parameter(".".join( [estimator, str(call_number), name]), str(value), param_type=parameter_type, description=description, additional_data=mapping_data) except Exception as e: logger.error( f"Couldn't log parameter {estimator + '.' + name} with value {value}" ) _logger_output.hyper_parameter_to = hyper_params.store()
def track_class(self, cls, ctx=None, fn_anchors: dict = None, mappings: Dict[str, Mapping] = None, additional_data=None): """ Method to inject logging capabilities into a class functions :param additional_data: Additional meta data to be provided to the tracking. This should be used to map to rdf. :param cls: Class to extend :param ctx: Ctx which defined the function :param fn_anchors: {Functions :Anchors} to trigger on each function call of the class :param mappings: Mapping defining this extension :return: The extended function """ if additional_data is None: additional_data = {} # Warn if ctx doesn't defined the function we want to track if ctx is not None and not hasattr(ctx, cls.__name__): logger.warning("Given context " + str(ctx) + " doesn't define " + str(cls.__name__)) ctx = None # If we don't have a valid ctx the class is unbound, # so we create a dummy ctx holding the class, otherwise we can extract the ctx path if ctx is not None: if hasattr(ctx, '__module__' ) and ctx.__module__ is not str.__class__.__module__: ctx_path = ctx.__module__.__name__ else: ctx_path = ctx.__name__ else: ctx = ModuleType("<unbound-module>") setattr(ctx, cls.__name__, cls) ctx_path = ctx.__name__ if fn_anchors is None: fn_anchors = {cls.__init__.__name__: [get_anchor("pypads_log")]} elif not isinstance(fn_anchors, dict): logger.error( 'function anchors passed to track class has to be a dict of funtions, anchors' ) else: for fn, anchors in fn_anchors.items(): if not isinstance(anchors, Iterable): fn_anchors[fn] = [anchors] for fn, anchors in fn_anchors.items(): _anchors = set() for a in anchors: if isinstance(a, str): anchor = get_anchor(a) if anchor is None: anchor = Anchor(a, "No description available") _anchors.add(anchor) elif isinstance(a, Anchor): _anchors.add(a) fn_anchors[fn] = _anchors # If no mapping was given a default mapping has to be created if mappings is None: logger.warning( "Tracking a function without a mapping definition. A default mapping will be generated." ) _matched_mappings = set() if '__file__' in cls.__dict__: lib = cls.__dict__['__file__'] else: lib = cls.__module__ # For all events we want to hook to each class function for fn, anchors in fn_anchors.items(): _mapping = Mapping( PackagePathMatcher(ctx_path + "." + cls.__name__ + "." + fn), make_run_time_mapping_collection( lib), anchors, set(), { **additional_data, **{ "mapped_by": "http://www.padre-lab.eu/onto/PyPadsApi" } }) _matched_mappings.add( MatchedMapping( _mapping, PackagePath(ctx_path + "." + cls.__name__ + "." + fn))) else: _matched_mappings = { MatchedMapping( mapping, PackagePath(ctx_path + "." + cls.__name__ + "." + fn)) for fn, mapping in mappings.items() } # Wrap the function of given context and return it return self.pypads.wrap_manager.wrap( cls, ctx=ctx, matched_mappings=_matched_mappings)