def should_turndown_previous(self): """Should the previous block be turned down? """ prev = self.previous() return prev is not None and prev.state == 'ACTIVE' and \ self.time_expired()[0] > min(1000 * 60 * config.get().MX_TURNDOWN_MIN, BLOCK_SIZE * float(config.get().MX_TURNDOWN_PCT) / 100.)
def should_create_next(self): """Should the next block be created? """ next_block = self.next() if next_block and next_block.state == 'ACTIVE': return False return self.time_remaining()[0] < max( 1000 * 60 * config.get().MX_CREATE_NEXT_MIN, BLOCK_SIZE * float(config.get().MX_CREATE_NEXT_PCT) / 100.)
def turndown_tables(self): """Reduce write throughput for this block. """ try: self.dp_writer.flush() except: pass self.dp_writer = None if self.data_points_table: self.data_points_table.update({ 'read': config.get().TP_READ_DATAPOINTS / BLOCKS, 'write': 1 }) if self.index_table: self.index_table.update({ 'read': config.get().TP_READ_INDEX_KEY / BLOCKS, 'write': 1 })
def create_tables(self): """Create tables. """ if self.data_points_table and self.index_table: return self.state self.item['data_points_name'] = '%s_%s' % (config.table_name('dp'), self.tbase) self.item['index_name'] = '%s_%s' % (config.table_name('dp_index'), self.tbase) try: self.bind() except: if not self.data_points_table: Table.create(self.data_points_name, schema=[ HashKey('domain_metric_tbase_tags'), RangeKey('toffset', data_type=NUMBER) ], throughput={ 'read': config.get().TP_READ_DATAPOINTS / BLOCKS, 'write': config.get().TP_WRITE_DATAPOINTS }, connection=self.connection) if not self.index_table: Table.create( self.index_name, schema=[HashKey('domain_metric'), RangeKey('tbase_tags')], throughput={ 'read': config.get().TP_READ_INDEX_KEY / BLOCKS, 'write': config.get().TP_WRITE_INDEX_KEY }, connection=self.connection) self.item['state'] = self.bind() self.item.save(overwrite=True) return self.state
class Block(object): index_key_lru = LRUCache(config.get().CACHE_WRITE_INDEX_KEY) def __init__(self, master, connection, n): self.master = master self.connection = connection self.item = self.master.query(n__eq=n, consistent=True).next() self.dp_writer = self.data_points_table = self.index_table = None # noinspection PyBroadException try: self.bind() except: pass # TODO log def bind(self): """Bind to existing tables. """ if self.data_points_name and self.index_name: data_points_table = Table(self.data_points_name, connection=self.connection) try: s1 = data_points_table.describe()['Table']['TableStatus'] except: raise else: self.data_points_table = data_points_table self.dp_writer = TimedBatchTable( self.data_points_table.batch_write()) index_table = Table(self.index_name, connection=self.connection) try: s2 = index_table.describe()['Table']['TableStatus'] except: raise else: self.index_table = index_table if s1 == s2: self.item['state'] = s1 else: self.item['state'] = 'UNDEFINED' return self.state def create_tables(self): """Create tables. """ if self.data_points_table and self.index_table: return self.state self.item['data_points_name'] = '%s_%s' % (config.table_name('dp'), self.tbase) self.item['index_name'] = '%s_%s' % (config.table_name('dp_index'), self.tbase) try: self.bind() except: if not self.data_points_table: Table.create(self.data_points_name, schema=[ HashKey('domain_metric_tbase_tags'), RangeKey('toffset', data_type=NUMBER) ], throughput={ 'read': config.get().TP_READ_DATAPOINTS / BLOCKS, 'write': config.get().TP_WRITE_DATAPOINTS }, connection=self.connection) if not self.index_table: Table.create( self.index_name, schema=[HashKey('domain_metric'), RangeKey('tbase_tags')], throughput={ 'read': config.get().TP_READ_INDEX_KEY / BLOCKS, 'write': config.get().TP_WRITE_INDEX_KEY }, connection=self.connection) self.item['state'] = self.bind() self.item.save(overwrite=True) return self.state def replace(self, new_timestamp): """Replace this block with new block. """ if block_pos(new_timestamp) != self.n: raise ValueError( 'time %s (pos=%s) is not valid for block (pos=%s)' % (new_timestamp, block_pos(new_timestamp), self.n)) if base_time(new_timestamp) == self.tbase: return self self.delete_tables(new_timestamp) return self def delete_tables(self, new_timestamp=None): """Delete the tables for this block. """ if not new_timestamp: new_timestamp = self.tbase if self.data_points_table: # noinspection PyBroadException try: self.data_points_table.delete() except: pass self.data_points_table = None self.dp_writer = None if self.index_table: try: self.index_table.delete() except: pass self.index_table = None try: self.item.delete() except: pass self.item = Item(self.master, data=dict(self.item.items())) self.item['state'] = 'INITIAL' self.item['tbase'] = base_time(new_timestamp) self.item.save(overwrite=True) return self.state def turndown_tables(self): """Reduce write throughput for this block. """ try: self.dp_writer.flush() except: pass self.dp_writer = None if self.data_points_table: self.data_points_table.update({ 'read': config.get().TP_READ_DATAPOINTS / BLOCKS, 'write': 1 }) if self.index_table: self.index_table.update({ 'read': config.get().TP_READ_INDEX_KEY / BLOCKS, 'write': 1 }) @property def n(self): return self.item['n'] @property def tbase(self): return self.item['tbase'] @property def data_points_name(self): return self.item['data_points_name'] @property def index_name(self): return self.item['index_name'] @property def state(self): state = self.item['state'] if state == 'INITIAL': return state s1 = self._calc_state(self.data_points_table.describe()) s2 = self._calc_state(self.index_table.describe()) if s1 != s2: return 'UNDEFINED' return s1 def store_datapoint(self, timestamp, metric, tags, value, domain): """Store index key and datapoint value in tables. """ #TODO: exception if not self.dp_writer: return key = util.hdata_points_key(domain, metric, timestamp, tags) self._store_index(key, timestamp, metric, tags, domain) return self.dp_writer.put_item( data={ 'domain_metric_tbase_tags': key, 'toffset': util.offset_time(timestamp), 'value': value }) def query_index(self, domain, metric, start_time, end_time): """Query index for keys. """ if not self.index_table: return [] key = util.index_hash_key(domain, metric) time_range = map( str, [util.base_time(start_time), util.base_time(end_time) + 1]) return [ IndexKey(k) for k in self.index_table.query(consistent=False, domain_metric__eq=key, tbase_tags__between=time_range) ] def query_datapoints(self, index_key, start_time, end_time, attributes=tuple(['value'])): """Query datapoints. """ if not self.data_points_table: return [] key = index_key.to_data_points_key() time_range = util.offset_range(index_key, start_time, end_time) attributes_ = ['toffset'] attributes_.extend(attributes) return [ value for value in self.data_points_table.query( consistent=False, reverse=True, attributes=attributes_, domain_metric_tbase_tags__eq=key, toffset__between=time_range) ] # noinspection PyMethodMayBeStatic def _calc_state(self, desc): desc = desc['Table'] state = desc['TableStatus'] if state == 'ACTIVE' and desc['ProvisionedThroughput'][ 'WriteCapacityUnits'] == 1: state = 'TURNED_DOWN' return state # noinspection PyMethodMayBeStatic def _store_cache(self, key, cache, table, data): if not cache.get(key): table.put_item(data=data(), overwrite=True) cache.put(key, 1) def _store_index(self, key, timestamp, metric, tags, domain): """Store an index key if not yet stored. """ self._store_cache( key, Block.index_key_lru, self.index_table, lambda: { 'domain_metric': util.index_hash_key(domain, metric), 'tbase_tags': util.index_range_key(timestamp, tags) }) def __str__(self): return str((self.n, self.state, self.tbase, self.data_points_name, self.index_name)) def __repr__(self): return str(self)
from amondawa import config, util from amondawa.util import IndexKey from amondawa.writer import TimedBatchTable from boto.dynamodb2.fields import HashKey, RangeKey from boto.dynamodb2.items import Item from boto.dynamodb2.table import Table from boto.dynamodb2.types import * from repoze.lru import LRUCache from threading import Thread import time import traceback # store history in how many blocks (e.g. 12) BLOCKS = int(config.get().STORE_HISTORY_BLOCKS) + 1 # +1 bumper # of what size (e.g. 30 days) BLOCK_SIZE = int(config.get().STORE_HISTORY / config.get().STORE_HISTORY_BLOCKS) # history without bumper AVAILABLE_HISTORY = (BLOCKS - 1) * BLOCK_SIZE # -1 bumper # how long to store data points (e.g. 360 days) HISTORY = BLOCKS * BLOCK_SIZE def base_time(timestamp): return timestamp - timestamp % BLOCK_SIZE def block_pos(timestamp): return int((util.base_time(timestamp) % HISTORY) / BLOCK_SIZE)
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. """ Classes for querying datapoints. """ from amondawa import util, config from amondawa.mtime import timeit from concurrent.futures import ThreadPoolExecutor from pandas.tseries import frequencies as freq import numpy as np import pandas as pd config = config.get() # TODO: shutdown gracefully thread_pool = ThreadPoolExecutor(max_workers=config.MT_WRITERS) # time intervals FREQ_MILLIS = { 'milliseconds': 1, 'seconds': 1000, 'minutes': 1000 * 60, 'hours': 1000 * 60 * 60, 'days': 1000 * 60 * 60 * 24, 'weeks': 1000 * 60 * 60 * 24 * 7, 'months': 1000 * 60 * 60 * 24 * 30, 'years': 1000 * 60 * 60 * 24 * 365 }
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. """ Utility classes - string manipulation, key <-> string conversion etc. """ from boto.dynamodb.types import get_dynamodb_type, is_str, is_num from decimal import Decimal from flask import json from amondawa import config import hashlib import time MAGIC = '0xCAFEBABE' COLUMN_HEIGHT = config.get().STORE_COLUMN_HEIGHT def to_dynamo_compat_type(value): try: if is_num(value): value = Decimal(str(value)) elif type(value) in (list, tuple): value = set(value) elif type(value) is dict: value = MAGIC + json.dumps(value) dtype = get_dynamodb_type(value) if dtype == 'NS': value = set([Decimal(str(v)) for v in value]) except TypeError: value = MAGIC + json.dumps(value)
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. """ Classes for querying datapoints. """ from amondawa import util, config from amondawa.mtime import timeit from concurrent.futures import ThreadPoolExecutor from pandas.tseries import frequencies as freq import numpy as np import pandas as pd config = config.get() # TODO: shutdown gracefully thread_pool = ThreadPoolExecutor(max_workers=config.MT_WRITERS) # time intervals FREQ_MILLIS = { 'milliseconds': 1, 'seconds': 1000, 'minutes': 1000 * 60, 'hours': 1000 * 60 * 60, 'days': 1000 * 60 * 60 * 24, 'weeks': 1000 * 60 * 60 * 24 * 7, 'months': 1000 * 60 * 60 * 24 * 30, 'years': 1000 * 60 * 60 * 24 * 365 }
# IN THE SOFTWARE. """ Utility classes - string manipulation, key <-> string conversion etc. """ from boto.dynamodb.types import get_dynamodb_type, is_str, is_num from decimal import Decimal from flask import json from amondawa import config import hashlib import time MAGIC = '0xCAFEBABE' COLUMN_HEIGHT = config.get().STORE_COLUMN_HEIGHT def to_dynamo_compat_type(value): try: if is_num(value): value = Decimal(str(value)) elif type(value) in (list, tuple): value = set(value) elif type(value) is dict: value = MAGIC + json.dumps(value) dtype = get_dynamodb_type(value) if dtype == 'NS': value = set([Decimal(str(v)) for v in value]) except TypeError: value = MAGIC + json.dumps(value) return value
class Schema(object): # table_names is a dictionary to find this instance's tables e.g.: # { # 'metric_names': 'amdw1_metric_names', # 'tag_names': 'amdw1_tag_names', # 'tag_values': 'amdw1_tag_values', # 'credentials': 'amdw1_credentials' # } table_names = config.table_names(['metric_names', 'tag_names', 'tag_values', 'credentials']) # these (core_tables) tables won't be deleted by the delete operation below core_tables = config.table_names(['credentials', 'config']).values() metric_names_tp = {'read': 1, 'write': 1} tag_names_tp = {'read': 1, 'write': 1} tag_values_tp = {'read': 1, 'write': 1} datapoints_lru = LRUCache(config.get().CACHE_DATAPOINTS) index_key_lru = LRUCache(config.get().CACHE_QUERY_INDEX_KEY) @staticmethod # TODO: handle concurrent table operations limit (tables operations > 10) def delete(connection): """Destructive delete of schema and data. """ for table in Schema.table_names.values(): try: # don't delete credentials table if table.table_name not in Schema.core_tables: Table(table, connection=connection).delete() except: pass DatapointsSchema.delete(connection) @staticmethod def bind(connection): """Bind to existing dynamodb tables. """ return dict(((table, Table(table_name, connection=connection)) \ for table, table_name in Schema.table_names.items())) @staticmethod def create(connection): """Create dynamodb tables. """ Table.create(config.table_name('metric_names'), schema=[HashKey('domain'), RangeKey('name')], throughput=Schema.metric_names_tp, connection=connection) Table.create(config.table_name('tag_names'), schema=[HashKey('domain'), RangeKey('name')], throughput=Schema.tag_names_tp, connection=connection) Table.create(config.table_name('tag_values'), schema=[HashKey('domain'), RangeKey('value')], throughput=Schema.tag_values_tp, connection=connection) DatapointsSchema.create(connection) Credential = collections.namedtuple('Key', 'access_key_id permissions secret_access_key state') def __init__(self, connection, start_mx=False): """Initilize data structures. """ self.connection = connection # TODO these should be LRU cache - this will become too large self.metric_name_cache = set() self.tag_name_cache = set() self.tag_value_cache = set() # use table names as var names vars(self).update(Schema.bind(connection)) self.blocks = DatapointsSchema(connection) if start_mx: self.blocks.start_maintenance() @atexit.register def close(): try: self.connection.close() except: pass # called on abruptly exit def close(self): """Close connection and flush pending operations. """ self.connection.close() def get_credentials(self): """Get credentials. """ return dict([(item['access_key_id'], Schema.Credential(item['access_key_id'], item['permissions'], item['secret_access_key'], item['state'])) for item in self.credentials.scan()]) def get_metric_names(self, domain): """Get all metric names. """ return [item['name'] for item in self.metric_names.query(consistent=False, attributes=['name'], domain__eq=domain)] def get_tag_names(self, domain): """Get tag names. """ return [item['name'] for item in self.tag_names.query(consistent=False, attributes=['name'], domain__eq=domain)] def get_tag_values(self, domain): """Get all tag values. """ return [item['value'] for item in self.tag_values.query(consistent=False, attributes=['value'], domain__eq=domain)] def store_datapoint(self, timestamp, metric, tags, value, domain): """Store a single datapoint, adding to ancillary tables if required. This call will buffer write operations into the provided writer before sending to dynamodb. """ self._store_tags(domain, tags) self._store_metric(domain, metric) self.blocks.store_datapoint(timestamp, metric, tags, value, domain) def query_index(self, domain, metric, start_time, end_time): """Query index for keys. """ return self.blocks.query_index(domain, metric, start_time, end_time) def query_datapoints(self, index_key, start_time, end_time, attributes=['value']): """Query datapoints. """ return self.blocks.query_datapoints(index_key, start_time, end_time, attributes) def _store_cache(self, key, cache, table, data): if not key in cache: table.put_item(data=data(), overwrite=True) cache.add(key) def _store_tag_name(self, domain, name): """Store tag name if not yet stored. """ self._store_cache('|'.join([domain, name]), self.tag_name_cache, self.tag_names, lambda: {'domain': domain, 'name': name}) def _store_tag_value(self, domain, value): """Store tag value if not yet stored. """ self._store_cache('|'.join([domain, value]), self.tag_value_cache, self.tag_values, lambda: {'domain': domain, 'value': value}) def _store_metric(self, domain, metric): """Store metric name if not yet stored. """ self._store_cache('|'.join([domain, metric]), self.metric_name_cache, self.metric_names, lambda: {'domain': domain, 'name': metric}) def _store_tags(self, domain, tags): """Store tags if not yet stored. """ for name, value in tags.items(): self._store_tag_name(domain, name) self._store_tag_value(domain, value)