Ejemplo n.º 1
0
class TaskRunMetricsFileStoreReader(object):
    def __init__(self, attempt_folder, **kwargs):
        super(TaskRunMetricsFileStoreReader, self).__init__(**kwargs)
        self.meta = TaskRunMetaFiles(attempt_folder)

    def _get_all_metrics_names(self, source=None):
        metrics_root = self.meta.get_metric_folder(source=source)
        all_files = [
            os.path.basename(str(p)) for p in metrics_root.list_partitions()
        ]
        return all_files

    def get_metric_history(self, key, source=None):
        metric_target = self.meta.get_metric_target(key, source=source)
        if not metric_target.exists():
            raise DatabandError("Metric '%s' not found" % key)
        metric_data = metric_target.readlines()
        rsl = []
        for pair in metric_data:
            ts, val = pair.strip().split(" ")
            rsl.append(Metric(key, float(val),
                              datetime.fromtimestamp(int(ts))))
        return rsl

    def get_all_metrics_values(self, source=None):
        metrics = []
        for key in self._get_all_metrics_names(source=source):
            try:
                metrics.append(self.get_metric(key, source=source))
            except Exception as ex:
                raise DatabandError(
                    "Failed to read metrics for %s at %s" %
                    (key, self.meta.root),
                    nested_exceptions=ex,
                )
        return {m.key: m.value for m in metrics}

    def get_run_info(self):

        from dbnd.api.serialization.run import RunInfoSchema

        with self.meta.get_meta_data_file().open("r") as yaml_file:
            return RunInfoSchema().load(**yaml.load(yaml_file))

    def get_metric(self, key, source=None):
        metric_target = self.meta.get_metric_target(key, source=source)
        if not metric_target.exists():
            raise DatabandRuntimeError("Metric '%s' not found" % key)
        metric_data = metric_target.readlines()
        if len(metric_data) == 0:
            raise DatabandRuntimeError(
                "Metric '%s' is malformed. No data found." % key)
        first_line = metric_data[0]

        metric_parsed = _METRICS_RE.match(first_line)
        if not metric_parsed:
            raise DatabandRuntimeError(
                "Metric '%s' is malformed. Expected format: 'TS VALUE', got='%s'"
                % (key, first_line))

        timestamp, val = metric_parsed.groups()

        return Metric(
            key=key,
            value=_parse_metric(val),
            timestamp=datetime.fromtimestamp(int(timestamp)),
        )

    def get_artifact(self, name):
        artifact_target = self.meta.get_artifact_target(name)
        if not artifact_target.exists():
            raise DatabandError("Artifact '%s' not found" % name)
        return Artifact(artifact_target.path)
Ejemplo n.º 2
0
class TaskRunMetricsFileStoreReader(object):
    def __init__(self, attempt_folder, **kwargs):
        super(TaskRunMetricsFileStoreReader, self).__init__(**kwargs)
        self.meta = TaskRunMetaFiles(attempt_folder)

    def _get_all_metrics_names(self, source=None):
        metrics_root = self.meta.get_metric_folder(source=source)
        all_files = [
            os.path.basename(str(p)) for p in metrics_root.list_partitions()
        ]
        return [re.sub(r"\.json\b", "", f) for f in all_files]

    def get_metric_history(self, key, source=None):
        metric_target = self.meta.get_metric_target(key, source=source)
        if not metric_target.exists():
            raise DatabandError("Metric '%s' not found" % key)
        metric_data = metric_target.readlines()
        rsl = []
        for pair in metric_data:
            ts, val = pair.strip().split(" ")
            rsl.append(Metric(key, float(val),
                              datetime.fromtimestamp(int(ts))))
        return rsl

    def get_all_metrics_values(self, source=None):
        metrics = []
        for key in self._get_all_metrics_names(source=source):
            try:
                metrics.extend(self.get_metrics(key, source=source))
            except Exception as ex:
                raise DatabandError(
                    "Failed to read metrics for %s at %s" %
                    (key, self.meta.root),
                    nested_exceptions=ex,
                )
        return {m.key: m.value for m in metrics}

    def get_run_info(self):
        with self.meta.get_meta_data_file().open("r") as yaml_file:
            return RunInfoSchema().load(**yaml.load(yaml_file))

    def get_metrics(self, key, source=None):
        # type: (str, MetricSource) -> Iterable[Metric]
        if source == MetricSource.histograms:
            return self.get_histogram_metrics(key)

        metric_target = self.meta.get_metric_target(key, source=source)
        if not metric_target.exists():
            raise DatabandRuntimeError("Metric '%s' not found" % key)
        metric_data = metric_target.readlines()
        if len(metric_data) == 0:
            raise DatabandRuntimeError(
                "Metric '%s' is malformed. No data found." % key)
        first_line = metric_data[0]

        metric_parsed = _METRICS_RE.match(first_line)
        if not metric_parsed:
            raise DatabandRuntimeError(
                "Metric '%s' is malformed. Expected format: 'TS VALUE', got='%s'"
                % (key, first_line))

        timestamp, val = metric_parsed.groups()

        metric = Metric(
            key=key,
            value=_parse_metric(val),
            timestamp=datetime.fromtimestamp(int(timestamp)),
        )
        return [metric]

    def get_histogram_metrics(self, key):
        # type: (str) -> Iterable[Metric]
        metric_target = self.meta.get_metric_target(
            "{}.json".format(key), source=MetricSource.histograms)
        hist_metrics = json.load(metric_target)
        timestamp = hist_metrics["timestamp"]
        metrics = hist_metrics["metrics"]
        for name, value in metrics.items():
            if not isinstance(value, (dict, list)):
                yield Metric(
                    key="{}.{}".format(key, name),
                    value=value,
                    timestamp=datetime.fromtimestamp(timestamp),
                )
                continue

            yield Metric(
                key="{}.{}".format(key, name),
                value_json=value,
                timestamp=datetime.fromtimestamp(timestamp),
            )
            if name == "stats":
                for column, stats in value.items():
                    for stat, val in stats.items():
                        yield Metric(
                            key="{}.{}.{}".format(key, column, stat),
                            value=val,
                            timestamp=datetime.fromtimestamp(timestamp),
                        )
            elif name == "shape":
                for dim, val in enumerate(value):
                    yield Metric(
                        key="{}.shape{}".format(key, dim),
                        value=val,
                        timestamp=datetime.fromtimestamp(timestamp),
                    )

    def get_artifact(self, name):
        artifact_target = self.meta.get_artifact_target(name)
        if not artifact_target.exists():
            raise DatabandError("Artifact '%s' not found" % name)
        return Artifact(artifact_target.path)