Example #1
0
    def derivative(metric_values):
        """
        Compute the derivative of the given function.

        :type metric_values: pyspark.resultiterable.ResultIterable[dict]
        :param metric_values: The list of metric_values
        :return: Returns the derivative of the provided metric.
        """
        if len(metric_values.data) < 2:
            return []

        metric_name = metric_values.data[0]["metric"]["name"] + "_derivative"
        meta = metric_values.data[0]["meta"]
        dims = metric_values.data[0]["metric"]["dimensions"]
        # All values
        timestamps = map(lambda m: m["metric"]["timestamp"], metric_values)
        all_values = map(lambda m: m["metric"]["value"], metric_values)
        # Sort values
        all_values = [
            y
            for (_,
                 y) in sorted(zip(timestamps, all_values), key=lambda x: x[0])
        ]
        timestamps = sorted(timestamps)
        # Remove duplicates
        last_timestamp = timestamps[0]
        tmp_all_values = [all_values[0]]
        tmp_timestamps = [last_timestamp]
        for index in range(1, len(timestamps)):
            if timestamps[index] == last_timestamp:
                continue
            else:
                last_timestamp = timestamps[index]
                tmp_all_values.append(all_values[index])
                tmp_timestamps.append(last_timestamp)
        all_values = tmp_all_values
        timestamps = tmp_timestamps

        if len(all_values) < 2:
            return []

        # Filter all values that have the same timestamp
        n = len(all_values) - 1
        new_values = [
            float(all_values[1] - all_values[0]) /
            float(timestamps[1] - timestamps[0])
        ]
        for index in range(1, n):
            new_values.append(
                float(all_values[index + 1] - all_values[index - 1]) /
                float(timestamps[index + 1] - timestamps[index - 1]))
        new_values.append(
            float(all_values[n] - all_values[n - 1]) /
            float(timestamps[n] - timestamps[n - 1]))
        new_metrics = [
            helpers.create_agg_metric(metric_name, meta, dims, tmst, val)
            for val, tmst in zip(new_values, timestamps)
        ]
        return new_metrics
Example #2
0
    def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics):
        """
        Combine the given metrics of this RDD into one.

        :type all_metrics: pyspark.resultiterable.ResultIterable
        :param all_metrics: List containing the metrics.
        :param combine_fn: Combiner.
        :type combine_metric_name: str
        :param combine_metric_name: Name of the new metric
        :type nb_of_metrics: int
        :param nb_of_metrics: The number of metrics expected
        """
        # Separate metrics based on name
        separated_metrics = {}  # type: dict[str, list[dict]]
        dims = None
        for el in all_metrics:
            key = el["metric"]["name"]
            if dims is None:
                dims = el["metric"]["dimensions"]
            if key not in separated_metrics:
                separated_metrics[key] = [el]
            else:
                separated_metrics[key].append(el)

        if len(separated_metrics.keys()) != nb_of_metrics:
            return []

        separated_metrics = sorted(list(separated_metrics.iteritems()),
                                   key=lambda x: len(x[1]))
        separated_metrics = separated_metrics  # type: list[(str, list[dict])]

        # Sort each metric
        for metric in separated_metrics:
            metric[1].sort(key=lambda v: v["metric"]["timestamp"])

        temp_values = []
        all_timestamp = map(
            lambda l: map(lambda x: x["metric"]["timestamp"], l[1]),
            separated_metrics)
        for index in range(0, len(separated_metrics[0][1])):
            current_env = {
                separated_metrics[0][0]:
                separated_metrics[0][1][index]["metric"]["value"]
            }
            timestamp = all_timestamp[0][index]
            for metric_index in range(1, len(separated_metrics)):
                metric_prop = separated_metrics[metric_index]
                metric_name = metric_prop[0]
                current_env[metric_name] = helpers.interpolate(
                    timestamp, metric_prop[1], all_timestamp[metric_index])
            temp_values.append(current_env)

        new_values = map(combine_fn, temp_values)

        new_metrics = [
            helpers.create_agg_metric(combine_metric_name, {}, dims, tsmp, val)
            for val, tsmp in zip(new_values, all_timestamp[0])
        ]
        return new_metrics
    def aggregate(all_metrics, reducer, suffix):
        """
        Aggregate values produced by different providers together.
        The metric name is assumed to be the same for all providers.

        :type all_metrics: list[dict]
        :param all_metrics: Values to aggregate mapping to a specific
                            metric name.
        :type reducer: ((float, float) -> float,
            (float, float, float) -> float)
        :param reducer: Combine the metrics values together
        :type suffix: str
        :param suffix: Suffix to append to the metric name in its combined form
        """
        # Collect metric separately
        separated_metrics = {}  # type: dict[frozenset, list[dict]]
        for el in all_metrics:
            key = frozenset(el["metric"]["dimensions"].items())
            if key not in separated_metrics:
                separated_metrics[key] = [el]
            else:
                separated_metrics[key].append(el)

        # Collect all dimensions
        dims = {}
        for metric_dims in separated_metrics.keys():
            for prop, val in six.iteritems(dict(metric_dims)):
                if prop in dims:
                    dims[prop].add(val)
                else:
                    dims[prop] = set(val)

        # Sort each metric
        for _, metric in six.iteritems(separated_metrics):
            metric.sort(key=lambda v: v["metric"]["timestamp"])

        separated_metrics = sorted(list(separated_metrics.values()), key=len)
        separated_metrics.reverse()

        # Compute the new values
        new_values = []
        all_timestamps = [[x["metric"]["timestamp"] for x in l]
                          for l in separated_metrics]
        metric_count = len(separated_metrics)
        for index in range(0, len(separated_metrics[0])):
            new_value = reducer[0](
                separated_metrics[0][index]["metric"]["value"],
                metric_count)
            new_timestamp = separated_metrics[0][index]["metric"]["timestamp"]
            for metric_index in range(1, metric_count):
                new_value = reducer[1](new_value, helpers.interpolate(
                    new_timestamp,
                    separated_metrics[metric_index],
                    all_timestamps[metric_index]
                ), metric_count)
            new_values.append((new_timestamp, new_value))

        # Aggregate the other details:
        metric_name = separated_metrics[0][0]["metric"]["name"] + suffix
        meta = separated_metrics[0][0]["meta"]
        new_metrics = [
            helpers.create_agg_metric(
                metric_name,
                meta,
                dims,
                val[0],
                val[1]
            ) for val in new_values
        ]
        return new_metrics
    def aggregate(all_metrics, reducer, suffix):
        """
        Aggregate values produced by different providers together.
        The metric name is assumed to be the same for all providers.

        :type all_metrics: list[dict]
        :param all_metrics: Values to aggregate mapping to a specific
                            metric name.
        :type reducer: ((float, float) -> float,
            (float, float, float) -> float)
        :param reducer: Combine the metrics values together
        :type suffix: str
        :param suffix: Suffix to append to the metric name in its combined form
        """
        # Collect metric separately
        separated_metrics = {}  # type: dict[frozenset, list[dict]]
        for el in all_metrics:
            key = frozenset(el["metric"]["dimensions"].items())
            if key not in separated_metrics:
                separated_metrics[key] = [el]
            else:
                separated_metrics[key].append(el)

        # Collect all dimensions
        dims = {}
        for metric_dims in separated_metrics.keys():
            for prop, val in six.iteritems(dict(metric_dims)):
                if prop in dims:
                    dims[prop].add(val)
                else:
                    dims[prop] = set(val)

        # Sort each metric
        for _, metric in six.iteritems(separated_metrics):
            metric.sort(key=lambda v: v["metric"]["timestamp"])

        separated_metrics = sorted(list(separated_metrics.values()), key=len)
        separated_metrics.reverse()

        # Compute the new values
        new_values = []
        all_timestamps = [[x["metric"]["timestamp"] for x in l]
                          for l in separated_metrics]
        metric_count = len(separated_metrics)
        for index in range(0, len(separated_metrics[0])):
            new_value = reducer[0](
                separated_metrics[0][index]["metric"]["value"], metric_count)
            new_timestamp = separated_metrics[0][index]["metric"]["timestamp"]
            for metric_index in range(1, metric_count):
                new_value = reducer[1](new_value,
                                       helpers.interpolate(
                                           new_timestamp,
                                           separated_metrics[metric_index],
                                           all_timestamps[metric_index]),
                                       metric_count)
            new_values.append((new_timestamp, new_value))

        # Aggregate the other details:
        metric_name = separated_metrics[0][0]["metric"]["name"] + suffix
        meta = separated_metrics[0][0]["meta"]
        new_metrics = [
            helpers.create_agg_metric(metric_name, meta, dims, val[0], val[1])
            for val in new_values
        ]
        return new_metrics
    def combine(all_metrics, combine_fn, combine_metric_name, nb_of_metrics):
        """
        Combine the given metrics of this RDD into one.

        :type all_metrics: pyspark.resultiterable.ResultIterable
        :param all_metrics: List containing the metrics.
        :param combine_fn: Combiner.
        :type combine_metric_name: str
        :param combine_metric_name: Name of the new metric
        :type nb_of_metrics: int
        :param nb_of_metrics: The number of metrics expected
        """
        # Separate metrics based on name
        separated_metrics = {}  # type: dict[str, list[dict]]
        dims = None
        for el in all_metrics:
            key = el["metric"]["name"]
            if dims is None:
                dims = el["metric"]["dimensions"]
            if key not in separated_metrics:
                separated_metrics[key] = [el]
            else:
                separated_metrics[key].append(el)

        if len(separated_metrics.keys()) != nb_of_metrics:
            return []

        separated_metrics = sorted(list(six.iteritems(separated_metrics)),
                                   key=lambda x: len(x[1]))
        separated_metrics = separated_metrics  # type: list[(str, list[dict])]

        # Sort each metric
        for metric in separated_metrics:
            metric[1].sort(key=lambda v: v["metric"]["timestamp"])

        temp_values = []
        all_timestamp = [[x["metric"]["timestamp"] for x in l[1]]
                         for l in separated_metrics]
        for index in range(0, len(separated_metrics[0][1])):
            current_env = {
                separated_metrics[0][0]:
                    separated_metrics[0][1][index]["metric"]["value"]
            }
            timestamp = all_timestamp[0][index]
            for metric_index in range(1, len(separated_metrics)):
                metric_prop = separated_metrics[metric_index]
                metric_name = metric_prop[0]
                current_env[metric_name] = helpers.interpolate(
                    timestamp,
                    metric_prop[1],
                    all_timestamp[metric_index]
                )
            temp_values.append(current_env)

        new_values = map(combine_fn, temp_values)

        new_metrics = [
            helpers.create_agg_metric(
                combine_metric_name,
                {},
                dims,
                tsmp,
                val
            ) for val, tsmp in zip(new_values, all_timestamp[0])
        ]
        return new_metrics
    def derivative(metric_values):
        """
        Compute the derivative of the given function.

        :type metric_values: pyspark.resultiterable.ResultIterable[dict]
        :param metric_values: The list of metric_values
        :return: Returns the derivative of the provided metric.
        """
        if len(metric_values) < 2:
            return []

        metric_name = metric_values.data[0]["metric"]["name"] + "_derivative"
        meta = metric_values.data[0]["meta"]
        dims = metric_values.data[0]["metric"]["dimensions"]
        # All values
        timestamps = map(lambda m: m["metric"]["timestamp"], metric_values)
        all_values = map(lambda m: m["metric"]["value"], metric_values)
        # Sort values
        all_values = [y for (_, y) in sorted(zip(timestamps, all_values))]
        timestamps = sorted(timestamps)
        # Remove duplicates
        last_timestamp = timestamps[0]
        tmp_all_values = [all_values[0]]
        tmp_timestamps = [last_timestamp]
        for index in xrange(1, len(timestamps)):
            if timestamps[index] == last_timestamp:
                continue
            else:
                last_timestamp = timestamps[index]
                tmp_all_values.append(all_values[index])
                tmp_timestamps.append(last_timestamp)
        all_values = tmp_all_values
        timestamps = tmp_timestamps

        if len(all_values) < 2:
            return []

        # Filter all values that have the same timestamp
        n = len(all_values) - 1
        new_values = [
            float(all_values[1] - all_values[0]) /
            float(timestamps[1] - timestamps[0])
        ]
        for index in xrange(1, n):
            new_values.append(
                float(all_values[index + 1] - all_values[index - 1]) /
                float(timestamps[index + 1] - timestamps[index - 1])
            )
        new_values.append(
            float(all_values[n] - all_values[n - 1]) /
            float(timestamps[n] - timestamps[n - 1])
        )
        new_metrics = [
            helpers.create_agg_metric(
                metric_name,
                meta,
                dims,
                tmst,
                val
            ) for val, tmst in zip(new_values, timestamps)
        ]
        return new_metrics