def setUp(self): self.compression = 500 self.trials = 100 self.datapoints = 100000 self.quantiles = [ 0.0001, 0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999, 0.9999 ] self.md = MergeDigest(self.compression)
def __init__(self, config, logger, options): super(FlowDifference, self).__init__(config, logger, resource={'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink'}) self.namespace = 'FlowDifference' self.service = options['service'] self.params = options['params'] self.error_types = ['norm'] self.tdigest = MergeDigest() self.tdigest_key = 'md_flow:%s' % (self.service) self.error_evals = { 'tukey': eval_tukey, 'quantile': eval_quantile }
def __init__(self, config, logger, options): super(SeasonalDecomposition, self).__init__(config, logger, resource={ 'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink' }) self.namespace = 'SeasonalDecomposition' self.service = options['service'] self.params = options['params'] self.tdigest_key = 'md:%s' % self.service self.tdigest = MergeDigest() self.error_eval = {'tukey': eval_tukey, 'quantile': eval_quantile}
def __init__(self, config, logger, options): super(SeasonalDecomposition, self).__init__(config, logger, resource={'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink'}) self.namespace = 'SeasonalDecomposition' self.service = options['service'] self.params = options['params'] self.tdigest_key = 'md:%s' % self.service self.tdigest = MergeDigest() self.error_eval = { 'tukey': eval_tukey, 'quantile': eval_quantile }
def __init__(self, config, logger, options): super(SeasonalDecompositionEnsemble, self).__init__(config, logger, resource={ 'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink' }) self.namespace = 'SeasonalDecompositionEnsemble' self.service = options['service'] self.params = options['params'] self.error_types = ['norm', 'abs'] self.tdigests = {} self.tdigest_keys = {} for error_type in self.error_types: self.tdigests[error_type] = MergeDigest() self.tdigest_keys[error_type] = 'md_ensemble:%s::%s' % ( self.service, error_type) self.error_evals = {'tukey': eval_tukey, 'quantile': eval_quantile}
class TestMergeDigest(unittest.TestCase): def stub_cdf(self, x, data): n1 = 0 n2 = 0 for el in data: if el < x: n1 += 1 if el <= x: n2 += 1 return (n1 + n2) / 2.0 / len(data) def _stub_distribution_test(self, values): start = time() for value in values: self.md.add(value, 1) end = time() t_per_datapoint = float(end - start) / len(values) print 'Time per datapoint addition in microseconds: %f\n' % ( t_per_datapoint * 10**6) values = sorted(values) print 'Quantile\tDistr q value\tMergeDigest q value\tdifference' soft_error = 0 for quantile in self.quantiles: q_value_raw_distribution = np.percentile(values, quantile * 100) q_value_md = self.md.quantile(quantile) quantile_estimate = self.md.cdf(q_value_md) expect(quantile_estimate - quantile).to.be.below(0.005) quantile_estimate_with_q_value = self.stub_cdf(q_value_md, values) expect(quantile - quantile_estimate_with_q_value).to.be.below(0.012) if abs(quantile - quantile_estimate_with_q_value) > 0.005: soft_error += 1 diff = abs(q_value_raw_distribution - q_value_md) print '%f\t%f\t%f\t%f' % (quantile, q_value_raw_distribution, q_value_md, diff) expect(soft_error).to.be.below(3) def setUp(self): self.compression = 500 self.trials = 100 self.datapoints = 100000 self.quantiles = [ 0.0001, 0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999, 0.9999 ] self.md = MergeDigest(self.compression) def tearDown(self): self.md = None def test_serialization(self): for i in range(3): self.md.add(i, 1) expect(self.md.serialize()).to.be.equal('[[0, 1], [1, 1], [2, 1]]') def test_values_uniform(self): """ Test with uniform distribution in (0, 1) """ print TestMergeDigest.test_values_uniform.__doc__ values = [random() for _ in range(self.datapoints)] self._stub_distribution_test(values) def test_values_standard_normal(self): """ Test with Gaussian(0, 1) """ print TestMergeDigest.test_values_standard_normal.__doc__ values = [np.random.normal() for _ in range(self.datapoints)] self._stub_distribution_test(values) def test_values_gamma(self): """ Test with Gamma(0.1) """ print TestMergeDigest.test_values_gamma.__doc__ values = [np.random.gamma(0.1, 10) for _ in range(self.datapoints)] self._stub_distribution_test(values) def test_values_narrow_normal(self): """ Mixture of Uniform and Gaussian """ print TestMergeDigest.test_values_narrow_normal.__doc__ values = [] for _ in range(self.datapoints): criteria = random() if criteria < 0.5: values.append(criteria) else: values.append(np.random.normal()) self._stub_distribution_test(values) def test_values_sequence_asc(self): """ Test with sequential datapoints (ascending) """ print TestMergeDigest.test_values_sequence_asc.__doc__ values = [] c = 0 step = 1.0 / (self.datapoints + 1) for _ in range(self.datapoints): c += step values.append(c) self._stub_distribution_test(values) def test_values_sequence_desc(self): """ Test with sequential datapoints (descending) """ print TestMergeDigest.test_values_sequence_desc.__doc__ values = [] c = 1.0 step = 1.0 / (self.datapoints + 1) for _ in range(self.datapoints): c -= step values.append(c) self._stub_distribution_test(values)
class SeasonalDecomposition(BaseTask): def __init__(self, config, logger, options): super(SeasonalDecomposition, self).__init__(config, logger, resource={'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink'}) self.namespace = 'SeasonalDecomposition' self.service = options['service'] self.params = options['params'] self.tdigest_key = 'md:%s' % self.service self.tdigest = MergeDigest() self.error_eval = { 'tukey': eval_tukey, 'quantile': eval_quantile } def _read_tdigest(self): tdigest_json = [i for i in self.metric_sink.read(self.tdigest_key)] if tdigest_json: centroids = json.loads(tdigest_json[0]) [self.tdigest.add(c[0], c[1]) for c in centroids] def read(self): metric = self.params['metric'] period_length = self.params['period_length'] seasons = self.params['seasons'] interval = self.params['interval'] # gather data and assure requirements self._read_tdigest() data = [el for el in self.metric_sink.read(metric)] if not data[0]: self.logger.error('%s :: No Datapoints. Exiting' % self.service) return None data = sorted(data, key=lambda tup: tup.timestamp) if int(time()) - data[-1].timestamp > 3 * interval: self.logger.error('%s :: Datapoints are too old (%d sec). Exiting' % ( self.service, (int(time()) - data[-1].timestamp))) return None data = insert_missing_datapoints(data, False, interval) if len(data) < period_length * seasons + 1: self.logger.error( '%s :: Not enough (%d) datapoints. Exiting' % ( self.service, len(data))) return None data = data[-period_length * seasons - 1:-1] return data def process(self, data): error_params = self.params.get('error_params', {}) if data: period_length = self.params['period_length'] error_type = error_params.get('error_type', 'norm') error_handling = error_params.get('error_handling', 'tukey') data = [float(el.value) if el.value else False for el in data] input_val = data[-1] try: r_stl = robjects.r.stl r_ts = robjects.r.ts r_data_ts = r_ts(data, frequency=period_length) r_res = r_stl(r_data_ts, s_window="periodic", robust=True) r_res_ts = asarray(r_res[0]) seasonal = r_res_ts[:, 0][-1] trend = r_res_ts[:, 1][-1] # error_abs = r_res_ts[:, 2][-1] # due to outtages the trend component can be decreased and # and therefore negative model values are possible model = seasonal + trend model = max(0.01, model) error_abs = input_val - model except Exception as e: self.logger.error('%s :: STL Call failed: %s. Exiting' % (self.service, e)) return (0.0, 0.0, 0.0, 0.0, 0.0, {'flag': -1}) # normalize error if error_abs <= 0: error_norm = error_abs / model else: if input_val: error_norm = error_abs / input_val else: error_norm = 1.0 if error_type == 'norm': error = error_norm elif error_type == 'abs': error = error_abs # add error to distribution and evaluate self.tdigest.add(error, 1.0) state = self.error_eval[error_handling](error, error_params, self.tdigest) self.metric_sink.write( [RedisGeneric(self.tdigest_key, self.tdigest.serialize())]) return (input_val, model, seasonal, trend, error, state) else: return (0.0, 0.0, 0.0, 0.0, 0.0, {'flag': -1}) def write(self, state): (input_value, model, seasonal, trend, error, state) = state prefix = '%s.%s' % (self.namespace, self.service) now = int(time()) tuples = [] for name, value in state.iteritems(): tuples.append(TimeSeriesTuple('%s.%s' % (prefix, name), now, value)) if not input_value: input_value = 0.0 tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'model'), now, model)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'input'), now, input_value)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'seasonal'), now, seasonal)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'trend'), now, trend)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'error'), now, error)) self.output_sink.write(tuples) def run(self): data = self.read() state = self.process(data) self.write(state) return True
class SeasonalDecomposition(BaseTask): def __init__(self, config, logger, options): super(SeasonalDecomposition, self).__init__(config, logger, resource={ 'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink' }) self.namespace = 'SeasonalDecomposition' self.service = options['service'] self.params = options['params'] self.tdigest_key = 'md:%s' % self.service self.tdigest = MergeDigest() self.error_eval = {'tukey': eval_tukey, 'quantile': eval_quantile} def _read_tdigest(self): tdigest_json = [i for i in self.metric_sink.read(self.tdigest_key)] if tdigest_json: centroids = json.loads(tdigest_json[0]) [self.tdigest.add(c[0], c[1]) for c in centroids] def read(self): metric = self.params['metric'] period_length = self.params['period_length'] seasons = self.params['seasons'] interval = self.params['interval'] # gather data and assure requirements self._read_tdigest() data = [el for el in self.metric_sink.read(metric)] if not data[0]: self.logger.error('%s :: No Datapoints. Exiting' % self.service) return None data = sorted(data, key=lambda tup: tup.timestamp) if int(time()) - data[-1].timestamp > 3 * interval: self.logger.error( '%s :: Datapoints are too old (%d sec). Exiting' % (self.service, (int(time()) - data[-1].timestamp))) return None data = insert_missing_datapoints(data, False, interval) if len(data) < period_length * seasons + 1: self.logger.error('%s :: Not enough (%d) datapoints. Exiting' % (self.service, len(data))) return None data = data[-period_length * seasons - 1:-1] return data def process(self, data): error_params = self.params.get('error_params', {}) if data: period_length = self.params['period_length'] error_type = error_params.get('error_type', 'norm') error_handling = error_params.get('error_handling', 'tukey') data = [float(el.value) if el.value else False for el in data] input_val = data[-1] try: r_stl = robjects.r.stl r_ts = robjects.r.ts r_data_ts = r_ts(data, frequency=period_length) r_res = r_stl(r_data_ts, s_window="periodic", robust=True) r_res_ts = asarray(r_res[0]) seasonal = r_res_ts[:, 0][-1] trend = r_res_ts[:, 1][-1] # error_abs = r_res_ts[:, 2][-1] # due to outtages the trend component can be decreased and # and therefore negative model values are possible model = seasonal + trend model = max(0.01, model) error_abs = input_val - model except Exception as e: self.logger.error('%s :: STL Call failed: %s. Exiting' % (self.service, e)) return (0.0, 0.0, 0.0, 0.0, 0.0, {'flag': -1}) # normalize error if error_abs <= 0: error_norm = error_abs / model else: if input_val: error_norm = error_abs / input_val else: error_norm = 1.0 if error_type == 'norm': error = error_norm elif error_type == 'abs': error = error_abs # add error to distribution and evaluate self.tdigest.add(error, 1.0) state = self.error_eval[error_handling](error, error_params, self.tdigest) self.metric_sink.write( [RedisGeneric(self.tdigest_key, self.tdigest.serialize())]) return (input_val, model, seasonal, trend, error, state) else: return (0.0, 0.0, 0.0, 0.0, 0.0, {'flag': -1}) def write(self, state): (input_value, model, seasonal, trend, error, state) = state prefix = '%s.%s' % (self.namespace, self.service) now = int(time()) tuples = [] for name, value in state.iteritems(): tuples.append(TimeSeriesTuple('%s.%s' % (prefix, name), now, value)) if not input_value: input_value = 0.0 tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'model'), now, model)) tuples.append( TimeSeriesTuple('%s.%s' % (prefix, 'input'), now, input_value)) tuples.append( TimeSeriesTuple('%s.%s' % (prefix, 'seasonal'), now, seasonal)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'trend'), now, trend)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'error'), now, error)) self.output_sink.write(tuples) def run(self): data = self.read() state = self.process(data) self.write(state) return True
class FlowDifference(BaseTask): def __init__(self, config, logger, options): super(FlowDifference, self).__init__(config, logger, resource={'metric_sink': 'RedisSink', 'output_sink': 'GraphiteSink'}) self.namespace = 'FlowDifference' self.service = options['service'] self.params = options['params'] self.error_types = ['norm'] self.tdigest = MergeDigest() self.tdigest_key = 'md_flow:%s' % (self.service) self.error_evals = { 'tukey': eval_tukey, 'quantile': eval_quantile } def _read_tdigest(self): tdigest_json = [i for i in self.metric_sink.read(self.tdigest_key)] if tdigest_json: centroids = json.loads(tdigest_json[0]) [self.tdigest.add(c[0], c[1]) for c in centroids] def _read_data(self, metric): stale_time = self.params['stale'] data = [el for el in self.metric_sink.read(metric)] if not data or None in data: self.logger.error('%s :: No Datapoints for %s. Exiting' % (metric, self.service)) return None data = sorted(data, key=lambda tup: tup.timestamp) if int(time()) - data[-1].timestamp > stale_time: self.logger.error('%s :: Datapoints are too old (%d sec) for %s. Exiting' % ( self.service, (int(time()) - data[-1].timestamp), metric)) return None return data def process(self): in_metric = self.params['in_metric'] out_metric = self.params['out_metric'] error_params = self.params.get('error_params', {}) self._read_tdigest() # gather data and assure requirements in_data = self._read_data(in_metric) out_data = self._read_data(out_metric) if in_data and out_data: index = -2 out_val = out_data[index] in_val = in_data[index] deviation = out_val.value - in_val.value self.tdigest.add(deviation, 1.0) state = self.error_evals['tukey'](deviation, error_params, self.tdigest) self.metric_sink.write( [RedisGeneric(self.tdigest_key, self.tdigest.serialize())]) return (deviation, state) else: return (0.0, {'flag': -1.0}) def write(self, state): (deviation, states) = state prefix = '%s.%s' % (self.namespace, self.service) now = int(time()) tuples = [] for metric, value in states.iteritems(): tuples.append(TimeSeriesTuple('%s.%s' % (prefix, metric), now, value)) tuples.append(TimeSeriesTuple('%s.%s' % (prefix, 'deviation'), now, deviation)) self.output_sink.write(tuples) def run(self): state = self.process() self.write(state) return True