class TaskRunMetricsFileStoreReader(object): def __init__(self, attempt_folder, **kwargs): super(TaskRunMetricsFileStoreReader, self).__init__(**kwargs) self.meta = TaskRunMetaFiles(attempt_folder) def _get_all_metrics_names(self, source=None): metrics_root = self.meta.get_metric_folder(source=source) all_files = [ os.path.basename(str(p)) for p in metrics_root.list_partitions() ] return all_files def get_metric_history(self, key, source=None): metric_target = self.meta.get_metric_target(key, source=source) if not metric_target.exists(): raise DatabandError("Metric '%s' not found" % key) metric_data = metric_target.readlines() rsl = [] for pair in metric_data: ts, val = pair.strip().split(" ") rsl.append(Metric(key, float(val), datetime.fromtimestamp(int(ts)))) return rsl def get_all_metrics_values(self, source=None): metrics = [] for key in self._get_all_metrics_names(source=source): try: metrics.append(self.get_metric(key, source=source)) except Exception as ex: raise DatabandError( "Failed to read metrics for %s at %s" % (key, self.meta.root), nested_exceptions=ex, ) return {m.key: m.value for m in metrics} def get_run_info(self): from dbnd.api.serialization.run import RunInfoSchema with self.meta.get_meta_data_file().open("r") as yaml_file: return RunInfoSchema().load(**yaml.load(yaml_file)) def get_metric(self, key, source=None): metric_target = self.meta.get_metric_target(key, source=source) if not metric_target.exists(): raise DatabandRuntimeError("Metric '%s' not found" % key) metric_data = metric_target.readlines() if len(metric_data) == 0: raise DatabandRuntimeError( "Metric '%s' is malformed. No data found." % key) first_line = metric_data[0] metric_parsed = _METRICS_RE.match(first_line) if not metric_parsed: raise DatabandRuntimeError( "Metric '%s' is malformed. Expected format: 'TS VALUE', got='%s'" % (key, first_line)) timestamp, val = metric_parsed.groups() return Metric( key=key, value=_parse_metric(val), timestamp=datetime.fromtimestamp(int(timestamp)), ) def get_artifact(self, name): artifact_target = self.meta.get_artifact_target(name) if not artifact_target.exists(): raise DatabandError("Artifact '%s' not found" % name) return Artifact(artifact_target.path)
class TaskRunMetricsFileStoreReader(object): def __init__(self, attempt_folder, **kwargs): super(TaskRunMetricsFileStoreReader, self).__init__(**kwargs) self.meta = TaskRunMetaFiles(attempt_folder) def _get_all_metrics_names(self, source=None): metrics_root = self.meta.get_metric_folder(source=source) all_files = [ os.path.basename(str(p)) for p in metrics_root.list_partitions() ] return [re.sub(r"\.json\b", "", f) for f in all_files] def get_metric_history(self, key, source=None): metric_target = self.meta.get_metric_target(key, source=source) if not metric_target.exists(): raise DatabandError("Metric '%s' not found" % key) metric_data = metric_target.readlines() rsl = [] for pair in metric_data: ts, val = pair.strip().split(" ") rsl.append(Metric(key, float(val), datetime.fromtimestamp(int(ts)))) return rsl def get_all_metrics_values(self, source=None): metrics = [] for key in self._get_all_metrics_names(source=source): try: metrics.extend(self.get_metrics(key, source=source)) except Exception as ex: raise DatabandError( "Failed to read metrics for %s at %s" % (key, self.meta.root), nested_exceptions=ex, ) return {m.key: m.value for m in metrics} def get_run_info(self): with self.meta.get_meta_data_file().open("r") as yaml_file: return RunInfoSchema().load(**yaml.load(yaml_file)) def get_metrics(self, key, source=None): # type: (str, MetricSource) -> Iterable[Metric] if source == MetricSource.histograms: return self.get_histogram_metrics(key) metric_target = self.meta.get_metric_target(key, source=source) if not metric_target.exists(): raise DatabandRuntimeError("Metric '%s' not found" % key) metric_data = metric_target.readlines() if len(metric_data) == 0: raise DatabandRuntimeError( "Metric '%s' is malformed. No data found." % key) first_line = metric_data[0] metric_parsed = _METRICS_RE.match(first_line) if not metric_parsed: raise DatabandRuntimeError( "Metric '%s' is malformed. Expected format: 'TS VALUE', got='%s'" % (key, first_line)) timestamp, val = metric_parsed.groups() metric = Metric( key=key, value=_parse_metric(val), timestamp=datetime.fromtimestamp(int(timestamp)), ) return [metric] def get_histogram_metrics(self, key): # type: (str) -> Iterable[Metric] metric_target = self.meta.get_metric_target( "{}.json".format(key), source=MetricSource.histograms) hist_metrics = json.load(metric_target) timestamp = hist_metrics["timestamp"] metrics = hist_metrics["metrics"] for name, value in metrics.items(): if not isinstance(value, (dict, list)): yield Metric( key="{}.{}".format(key, name), value=value, timestamp=datetime.fromtimestamp(timestamp), ) continue yield Metric( key="{}.{}".format(key, name), value_json=value, timestamp=datetime.fromtimestamp(timestamp), ) if name == "stats": for column, stats in value.items(): for stat, val in stats.items(): yield Metric( key="{}.{}.{}".format(key, column, stat), value=val, timestamp=datetime.fromtimestamp(timestamp), ) elif name == "shape": for dim, val in enumerate(value): yield Metric( key="{}.shape{}".format(key, dim), value=val, timestamp=datetime.fromtimestamp(timestamp), ) def get_artifact(self, name): artifact_target = self.meta.get_artifact_target(name) if not artifact_target.exists(): raise DatabandError("Artifact '%s' not found" % name) return Artifact(artifact_target.path)