def test_block_slicing(self): gauged = Gauged(self.driver, resolution=1000, block_size=10000) with gauged.writer as writer: writer.add('foo', 100, timestamp=11000) writer.add('foo', 200, timestamp=23000) self.assertEqual(gauged.aggregate('foo', Gauged.MEAN, start=10000, end=30000), 150) self.assertEqual(gauged.aggregate('foo', Gauged.MEAN, start=11000, end=24000), 150) self.assertEqual(gauged.aggregate('foo', Gauged.MEAN, start=11000, end=23000), 100) self.assertEqual(gauged.aggregate('foo', Gauged.STDDEV, start=11000, end=23000), 0)
def test_clear_key_after(self): gauged = Gauged(self.driver, resolution=1000, block_size=10000) with gauged.writer as writer: writer.add({'foo': 1, 'bar': 1}, timestamp=10000) writer.add({'foo': 2, 'bar': 2}, timestamp=20000) writer.add({'foo': 3, 'bar': 3}, timestamp=30000) writer.add({'foo': 4, 'bar': 4}, timestamp=40000, namespace=1) self.assertEqual(gauged.value('foo', timestamp=40000), 3) self.assertEqual(gauged.value('bar', timestamp=40000), 3) self.assertEqual(gauged.value('foo', timestamp=40000, namespace=1), 4) self.assertEqual(gauged.value('bar', timestamp=40000, namespace=1), 4) # timestamp should be on a block boundary with gauged.writer as writer: with self.assertRaises(ValueError): writer.clear_key_after('foo', timestamp=15000) with gauged.writer as writer: writer.clear_key_after('foo', timestamp=20000) # every value before 20000 stays the same self.assertEqual(gauged.value('foo', timestamp=10000), 1) self.assertEqual(gauged.value('bar', timestamp=10000), 1) # 'foo' value on 20000 is cleared self.assertEqual(gauged.aggregate('foo', Gauged.COUNT, start=19000, end=21000), 0) # 'foo' value after 20000 is cleared self.assertEqual(gauged.aggregate('foo', Gauged.COUNT, start=20000, end=40000), 0) # 'bar' stays there self.assertEqual(gauged.value('bar', timestamp=10000), 1) with gauged.writer as writer: writer.add({'foo': 5, 'bar': 5}, timestamp=50000) writer.add({'foo': 5, 'bar': 5}, timestamp=50000, namespace=1) writer.add({'foo': 6, 'bar': 6}, timestamp=60000) writer.add({'foo': 6, 'bar': 6}, timestamp=60000, namespace=1) with gauged.writer as writer: writer.clear_key_after('foo', namespace=1, timestamp=50000) # 'foo' in namespace 0 is untouched after 50000 self.assertEqual(gauged.value('foo', timestamp=60000), 6) # 'foo' in namespace 1 is correctly cleared self.assertEqual(gauged.aggregate('foo', Gauged.COUNT, start=50000, end=60000, namespace=1), 0) # 'bar' is still there self.assertEqual(gauged.value('bar', timestamp=60000), 6) self.assertEqual(gauged.value('bar', timestamp=60000, namespace=1), 6)
def test_no_data(self): gauged = Gauged(self.driver) self.assertEqual(len(gauged.namespaces()), 0) self.assertEqual(len(gauged.value_series('foo')), 0) self.assertEqual(len(gauged.aggregate_series('foo', Gauged.SUM)), 0) self.assertEqual(gauged.value('foo'), None) self.assertEqual(gauged.aggregate('foo', Gauged.SUM), None) self.assertEqual(len(gauged.keys()), 0) stats = gauged.statistics() for attr in ['data_points', 'byte_count']: self.assertEqual(getattr(stats, attr), 0)
def test_fuzzy(self, decimal_places=4, max_values=3): def random_values(n, minimum, maximum, decimals): return [round(random.random() * (maximum - minimum) + minimum, decimals) for _ in xrange(n)] def percentile(values, percentile): if not len(values): return float('nan') values = sorted(values) rank = float(len(values) - 1) * percentile / 100 nearest_rank = int(floor(rank)) result = values[nearest_rank] if (ceil(rank) != nearest_rank): result += (rank - nearest_rank) * \ (values[nearest_rank + 1] - result) return result def stddev(values): total = len(values) mean = float(sum(values)) / total sum_of_squares = sum((elem - mean) ** 2 for elem in values) return sqrt(float(sum_of_squares) / total) for resolution in (100, 500, 1000): for n in xrange(1, max_values): for end in (1000, 10000): gauged = Gauged( self.driver, block_size=1000, resolution=resolution) gauged.driver.clear_schema() values = random_values(n, -100, 100, 2) with gauged.writer as writer: timestamps = sorted(random_values(n, 0, end, 0)) for value, timestamp in zip(values, timestamps): writer.add('foo', value, timestamp=int(timestamp)) self.assertAlmostEqual(sum(values), gauged.aggregate('foo', Gauged.SUM), places=decimal_places) self.assertAlmostEqual(min(values), gauged.aggregate('foo', Gauged.MIN), places=decimal_places) self.assertAlmostEqual(max(values), gauged.aggregate('foo', Gauged.MAX), places=decimal_places) self.assertAlmostEqual(len(values), gauged.aggregate('foo', Gauged.COUNT), places=decimal_places) mean = float(sum(values)) / len(values) self.assertAlmostEqual(mean, gauged.aggregate('foo', Gauged.MEAN), places=decimal_places) self.assertAlmostEqual(stddev(values), gauged.aggregate('foo', Gauged.STDDEV), places=decimal_places) self.assertAlmostEqual(percentile(values, 50), gauged.aggregate('foo', Gauged.MEDIAN), places=decimal_places) self.assertAlmostEqual(percentile(values, 98), gauged.aggregate('foo', Gauged.PERCENTILE, percentile=98), places=decimal_places)
def test_fuzzy(self, decimal_places=4, max_values=3): def random_values(n, minimum, maximum, decimals): return [ round(random.random() * (maximum - minimum) + minimum, decimals) \ for _ in xrange(n) ] def percentile(values, percentile): if not len(values): return float('nan') values = sorted(values) rank = float(len(values) - 1) * percentile / 100 nearest_rank = int(floor(rank)) result = values[nearest_rank] if (ceil(rank) != nearest_rank): result += (rank - nearest_rank) * (values[nearest_rank + 1] - result) return result def stddev(values): total = len(values) mean = float(sum(values)) / total sum_of_squares = sum((elem - mean) ** 2 for elem in values) return sqrt(float(sum_of_squares) / total) for resolution in (100, 500, 1000): for n in xrange(1, max_values): for end in (1000, 10000): gauged = Gauged(self.driver, block_size=1000, resolution=resolution) gauged.driver.clear_schema() values = random_values(n, -100, 100, 2) with gauged.writer as writer: timestamps = sorted(random_values(n, 0, end, 0)) for value, timestamp in zip(values, timestamps): writer.add('foo', value, timestamp=int(timestamp)) self.assertAlmostEqual(sum(values), gauged.aggregate('foo', Gauged.SUM), places=decimal_places) self.assertAlmostEqual(min(values), gauged.aggregate('foo', Gauged.MIN), places=decimal_places) self.assertAlmostEqual(max(values), gauged.aggregate('foo', Gauged.MAX), places=decimal_places) self.assertAlmostEqual(len(values), gauged.aggregate('foo', Gauged.COUNT), places=decimal_places) mean = float(sum(values)) / len(values) self.assertAlmostEqual(mean, gauged.aggregate('foo', Gauged.MEAN), places=decimal_places) self.assertAlmostEqual(stddev(values), gauged.aggregate('foo', Gauged.STDDEV), places=decimal_places) self.assertAlmostEqual(percentile(values, 50), gauged.aggregate('foo', Gauged.MEDIAN), places=decimal_places) self.assertAlmostEqual(percentile(values, 98), gauged.aggregate('foo', Gauged.PERCENTILE, percentile=98), places=decimal_places)
def test_aggregate(self): gauged = Gauged(self.driver, block_size=10000) self.assertEqual(gauged.aggregate('foobar', Gauged.SUM), None) with gauged.writer as writer: writer.add('foobar', 50, timestamp=10000) writer.add('foobar', 150, timestamp=15000) writer.add('foobar', 250, timestamp=20000) writer.add('foobar', 350, timestamp=40000) writer.add('foobar', 70, timestamp=60000) self.assertEqual( gauged.aggregate('foobar', Gauged.MIN, start=11000), 70) self.assertEqual( gauged.aggregate('foobar', Gauged.MIN, start=11000, end=55000), 150) self.assertEqual(gauged.aggregate('foobar', Gauged.SUM), 870) self.assertEqual(gauged.aggregate('foobar', Gauged.MIN), 50) self.assertEqual(gauged.aggregate('foobar', Gauged.MAX), 350) result = gauged.aggregate('foobar', Gauged.STDDEV) self.assertAlmostEqual(result, 112.7120224, places=5) result = gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=50) self.assertEqual(result, 150) result = gauged.aggregate('foobar', Gauged.MEDIAN) self.assertEqual(result, 150) result = gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=90) self.assertEqual(result, 310) result = gauged.aggregate('foobar', Gauged.COUNT) self.assertEqual(result, 5) start = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=10) end = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=20) self.assertEqual( gauged.aggregate('foobar', Gauged.MEAN, start=start, end=end), 100) with self.assertRaises(ValueError): gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=-1) with self.assertRaises(ValueError): gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=101) with self.assertRaises(ValueError): gauged.aggregate( 'foobar', Gauged.PERCENTILE, percentile=float('nan')) with self.assertRaises(ValueError): gauged.aggregate('foobar', 'unknown')
measurements = options['number'] span = end_timestamp - start_timestamp start = time() with gauged.writer as writer: data = ['foobar', 0] gauges = [data] add = writer.add for timestamp in xrange(start_timestamp, end_timestamp, span // measurements): data[1] = random() add(gauges, timestamp=timestamp*1000) elapsed = time() - start print 'Wrote %s measurements in %s seconds (%s/s)' % \ (number, round(elapsed, 3), abbreviate_number(measurements / elapsed)) statistics = gauged.statistics() byte_count = statistics.byte_count print 'Gauge data uses %s (%s per measurement)' % \ (abbreviate_bytes(byte_count), abbreviate_bytes(byte_count / float(measurements))) # Read benchmarks for aggregate in ('min', 'max', 'sum', 'count', 'mean', 'stddev', 'median'): start = time() gauged.aggregate('foobar', aggregate) elapsed = time() - start print '%s() in %ss (read %s measurements/s)' % \ (aggregate, round(elapsed, 3), abbreviate_number(measurements / elapsed))
class Reporter(object): """Object used to report both periodic statistics and events.""" player = None start_time = 0 report_file = None event_file = None report = False gauged = None def __init__(self, player): """Initialise files to save reports to.""" if OPTIONS.gauged: self.gauged = Gauged('mysql://root@localhost/gauged') self.gauged.sync() self.player = player file_name = self.player.directory + '/report.csv' self.report_file = open(file_name, 'w') file_name = self.player.directory + '/event.csv' self.event_file = open(file_name, 'w') self.start() def stop(self): """Stop reporting and close file handles.""" self.report = False try: self.report_file.close() except IOError: pass try: self.event_file.close() except IOError: pass def start(self): """Start reporting thread.""" self.report = True self.start_time = time.time() thread = threading.Thread(target=self.reporter, args=()) thread.daemon = True thread.start() def time_elapsed(self): """Calculate the time elapsed since the start of reporting.""" return round(time.time() - self.start_time, 4) def reporter(self): """Periodic reporting of various stats (every second) to file.""" if OPTIONS.gauged: try: mean = self.gauged.aggregate('bandwidth', Gauged.MEAN) count = self.gauged.aggregate('downloads', Gauged.SUM) print '[gauged]', mean, count except: print '[gauged] exception!' if self.report: thread = threading.Timer(interval=float( OPTIONS.reporting_period), function=self.reporter, args=()) thread.daemon = True thread.start() time_elapsed = self.time_elapsed() if OPTIONS.csv: try: self.report_file.flush() except ValueError: pass try: output = (str(time_elapsed) + "," + str(self.player.download_queue.time_buffer) + "," + str(self.player.download_queue.bandwidth) + "," + str(self.player.download_queue.id_) + "," + str(self.player.playback_queue.time_buffer) + "," + str(self.player.playback_queue.time_position) + "," + str(self.player.playback_queue.bandwidth) + "," + str(self.player.playback_queue.id_) + "," + str(self.player.bandwidth) + "\n") except AttributeError: output = str(time_elapsed) + str(', 0, 0, 0, 0, 0, 0, 0\n') try: self.report_file.write(output) except ValueError: pass if OPTIONS.debug: print("[report] " + output), try: self.report_file.flush() except ValueError: pass def event(self, action, description): """Create a thread to handle event.""" thread = threading.Thread(target=self.event_thread, args=(action, description)) thread.daemon = True thread.start() def event_thread(self, action, description): """Event reporting to file.""" time_elapsed = self.time_elapsed() if OPTIONS.csv: try: self.event_file.flush() except ValueError: pass output = (str(time_elapsed) + "," + str(action) + "," + str(description) + "\n") try: self.event_file.write(output) except ValueError: pass if OPTIONS.debug: print("[event] " + output), try: self.event_file.flush() except ValueError: pass def gauged_event(self, **gauged_data): """ Create a thread to handle event.""" if OPTIONS.gauged: thread = threading.Thread(target=self.gauged_event_thread, kwargs=gauged_data) thread.daemon = True thread.start() def gauged_event_thread(self, **gauged_data): """Event reporting to gauged.""" try: with self.gauged.writer as writer: writer.add(gauged_data) except: pass
print 'Spreading %s measurements to key "foobar" over %s days' % (number, options['days']) # Benchmark writes measurements = options['number'] span = end_timestamp - start_timestamp start = time() with gauged.writer as writer: data = [ 'foobar', 0 ] gauges = [ data ] add = writer.add for timestamp in xrange(start_timestamp, end_timestamp, span // measurements): data[1] = random() add(gauges, timestamp=timestamp) elapsed = time() - start print 'Wrote %s measurements in %s seconds (%s/s) (rss: %s)' % (number, round(elapsed, 3), abbreviate_number(measurements / elapsed), peak_rss()) statistics = gauged.statistics() byte_count = statistics.byte_count print 'Gauge data uses %s (%s per measurement)' % (abbreviate_bytes(byte_count), abbreviate_bytes(byte_count / float(measurements))) # Read benchmarks for aggregate in ( 'min', 'max', 'sum', 'count', 'mean', 'stddev', 'median' ): start = time() gauged.aggregate('foobar', aggregate) elapsed = time() - start print '%s() in %ss (read %s measurements/s) (rss: %s)' % (aggregate, round(elapsed, 3), abbreviate_number(measurements / elapsed), peak_rss())
def test_aggregate(self): gauged = Gauged(self.driver, block_size=10000) self.assertEqual(gauged.aggregate('foobar', Gauged.SUM), None) with gauged.writer as writer: writer.add('foobar', 50, timestamp=10000) writer.add('foobar', 150, timestamp=15000) writer.add('foobar', 250, timestamp=20000) writer.add('foobar', 350, timestamp=40000) writer.add('foobar', 70, timestamp=60000) self.assertEqual(gauged.aggregate('foobar', Gauged.MIN, start=11000), 70) self.assertEqual(gauged.aggregate('foobar', Gauged.MIN, start=11000, end=55000), 150) self.assertEqual(gauged.aggregate('foobar', Gauged.SUM), 870) self.assertEqual(gauged.aggregate('foobar', Gauged.MIN), 50) self.assertEqual(gauged.aggregate('foobar', Gauged.MAX), 350) result = gauged.aggregate('foobar', Gauged.STDDEV) self.assertAlmostEqual(result, 112.7120224, places=5) result = gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=50) self.assertEqual(result, 150) result = gauged.aggregate('foobar', Gauged.MEDIAN) self.assertEqual(result, 150) result = gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=90) self.assertEqual(result, 310) result = gauged.aggregate('foobar', Gauged.COUNT) self.assertEqual(result, 5) start = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=10) end = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=20) self.assertEqual(gauged.aggregate('foobar', Gauged.MEAN, start=start, end=end), 100) with self.assertRaises(ValueError): gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=-1) with self.assertRaises(ValueError): gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=101) with self.assertRaises(ValueError): gauged.aggregate('foobar', Gauged.PERCENTILE, percentile=float('nan')) with self.assertRaises(ValueError): gauged.aggregate('foobar', 'unknown')
class Reporter(object): """Object used to report both periodic statistics and events.""" player = None start_time = 0 report_file = None event_file = None report = False gauged = None def __init__(self, player): """Initialise files to save reports to.""" if OPTIONS.gauged: self.gauged = Gauged('mysql://root@localhost/gauged') self.gauged.sync() self.player = player file_name = self.player.directory + '/report.csv' self.report_file = open(file_name, 'w') file_name = self.player.directory + '/event.csv' self.event_file = open(file_name, 'w') self.start() def stop(self): """Stop reporting and close file handles.""" self.report = False try: self.report_file.close() except IOError: pass try: self.event_file.close() except IOError: pass def start(self): """Start reporting thread.""" self.report = True self.start_time = time.time() thread = threading.Thread(target=self.reporter, args=()) thread.daemon = True thread.start() def time_elapsed(self): """Calculate the time elapsed since the start of reporting.""" return round(time.time() - self.start_time, 4) def reporter(self): """Periodic reporting of various stats (every second) to file.""" if OPTIONS.gauged: try: mean = self.gauged.aggregate('bandwidth', Gauged.MEAN) count = self.gauged.aggregate('downloads', Gauged.SUM) print '[gauged]', mean, count except: print '[gauged] exception!' if self.report: thread = threading.Timer( interval=float(OPTIONS.reporting_period), function=self.reporter, args=()) thread.daemon = True thread.start() time_elapsed = self.time_elapsed() if OPTIONS.csv: try: self.report_file.flush() except ValueError: pass try: output = (str(time_elapsed) + "," + str(self.player.download_queue.time_buffer) + "," + str(self.player.download_queue.bandwidth) + "," + str(self.player.download_queue.id_) + "," + str(self.player.playback_queue.time_buffer) + "," + str(self.player.playback_queue.time_position) + "," + str(self.player.playback_queue.bandwidth) + "," + str(self.player.playback_queue.id_) + "," + str(self.player.bandwidth) + "\n") except AttributeError: output = str(time_elapsed) + str(', 0, 0, 0, 0, 0, 0, 0\n') try: self.report_file.write(output) except ValueError: pass if OPTIONS.debug: print ("[report] " + output), try: self.report_file.flush() except ValueError: pass def event(self, action, description): """Create a thread to handle event.""" thread = threading.Thread(target=self.event_thread, args=(action, description)) thread.daemon = True thread.start() def event_thread(self, action, description): """Event reporting to file.""" time_elapsed = self.time_elapsed() if OPTIONS.csv: try: self.event_file.flush() except ValueError: pass output = (str(time_elapsed) + "," + str(action) + "," + str(description) + "\n") try: self.event_file.write(output) except ValueError: pass if OPTIONS.debug: print ("[event] " + output), try: self.event_file.flush() except ValueError: pass def gauged_event(self, **gauged_data): """ Create a thread to handle event.""" if OPTIONS.gauged: thread = threading.Thread(target=self.gauged_event_thread, kwargs=gauged_data) thread.daemon = True thread.start() def gauged_event_thread(self, **gauged_data): """Event reporting to gauged.""" try: with self.gauged.writer as writer: writer.add(gauged_data) except: pass