Beispiel #1
0
    def should_turndown_previous(self):
        """Should the previous block be turned down?
        """
        prev = self.previous()

        return prev is not None and prev.state == 'ACTIVE' and \
               self.time_expired()[0] > min(1000 * 60 * config.get().MX_TURNDOWN_MIN,
                                            BLOCK_SIZE * float(config.get().MX_TURNDOWN_PCT) / 100.)
Beispiel #2
0
    def should_create_next(self):
        """Should the next block be created?
        """
        next_block = self.next()
        if next_block and next_block.state == 'ACTIVE':
            return False

        return self.time_remaining()[0] < max(
            1000 * 60 * config.get().MX_CREATE_NEXT_MIN,
            BLOCK_SIZE * float(config.get().MX_CREATE_NEXT_PCT) / 100.)
Beispiel #3
0
 def turndown_tables(self):
     """Reduce write throughput for this block.
     """
     try:
         self.dp_writer.flush()
     except:
         pass
     self.dp_writer = None
     if self.data_points_table:
         self.data_points_table.update({
             'read':
             config.get().TP_READ_DATAPOINTS / BLOCKS,
             'write':
             1
         })
     if self.index_table:
         self.index_table.update({
             'read': config.get().TP_READ_INDEX_KEY / BLOCKS,
             'write': 1
         })
Beispiel #4
0
    def create_tables(self):
        """Create tables.
        """
        if self.data_points_table and self.index_table:
            return self.state

        self.item['data_points_name'] = '%s_%s' % (config.table_name('dp'),
                                                   self.tbase)
        self.item['index_name'] = '%s_%s' % (config.table_name('dp_index'),
                                             self.tbase)

        try:
            self.bind()
        except:
            if not self.data_points_table:
                Table.create(self.data_points_name,
                             schema=[
                                 HashKey('domain_metric_tbase_tags'),
                                 RangeKey('toffset', data_type=NUMBER)
                             ],
                             throughput={
                                 'read':
                                 config.get().TP_READ_DATAPOINTS / BLOCKS,
                                 'write': config.get().TP_WRITE_DATAPOINTS
                             },
                             connection=self.connection)
            if not self.index_table:
                Table.create(
                    self.index_name,
                    schema=[HashKey('domain_metric'),
                            RangeKey('tbase_tags')],
                    throughput={
                        'read': config.get().TP_READ_INDEX_KEY / BLOCKS,
                        'write': config.get().TP_WRITE_INDEX_KEY
                    },
                    connection=self.connection)

            self.item['state'] = self.bind()

        self.item.save(overwrite=True)
        return self.state
Beispiel #5
0
class Block(object):
    index_key_lru = LRUCache(config.get().CACHE_WRITE_INDEX_KEY)

    def __init__(self, master, connection, n):
        self.master = master
        self.connection = connection
        self.item = self.master.query(n__eq=n, consistent=True).next()
        self.dp_writer = self.data_points_table = self.index_table = None
        # noinspection PyBroadException
        try:
            self.bind()
        except:
            pass  # TODO log

    def bind(self):
        """Bind to existing tables.
        """
        if self.data_points_name and self.index_name:
            data_points_table = Table(self.data_points_name,
                                      connection=self.connection)
            try:
                s1 = data_points_table.describe()['Table']['TableStatus']
            except:
                raise
            else:
                self.data_points_table = data_points_table
                self.dp_writer = TimedBatchTable(
                    self.data_points_table.batch_write())

            index_table = Table(self.index_name, connection=self.connection)
            try:
                s2 = index_table.describe()['Table']['TableStatus']
            except:
                raise
            else:
                self.index_table = index_table

            if s1 == s2:
                self.item['state'] = s1
            else:
                self.item['state'] = 'UNDEFINED'

        return self.state

    def create_tables(self):
        """Create tables.
        """
        if self.data_points_table and self.index_table:
            return self.state

        self.item['data_points_name'] = '%s_%s' % (config.table_name('dp'),
                                                   self.tbase)
        self.item['index_name'] = '%s_%s' % (config.table_name('dp_index'),
                                             self.tbase)

        try:
            self.bind()
        except:
            if not self.data_points_table:
                Table.create(self.data_points_name,
                             schema=[
                                 HashKey('domain_metric_tbase_tags'),
                                 RangeKey('toffset', data_type=NUMBER)
                             ],
                             throughput={
                                 'read':
                                 config.get().TP_READ_DATAPOINTS / BLOCKS,
                                 'write': config.get().TP_WRITE_DATAPOINTS
                             },
                             connection=self.connection)
            if not self.index_table:
                Table.create(
                    self.index_name,
                    schema=[HashKey('domain_metric'),
                            RangeKey('tbase_tags')],
                    throughput={
                        'read': config.get().TP_READ_INDEX_KEY / BLOCKS,
                        'write': config.get().TP_WRITE_INDEX_KEY
                    },
                    connection=self.connection)

            self.item['state'] = self.bind()

        self.item.save(overwrite=True)
        return self.state

    def replace(self, new_timestamp):
        """Replace this block with new block.
        """
        if block_pos(new_timestamp) != self.n:
            raise ValueError(
                'time %s (pos=%s) is not valid for block (pos=%s)' %
                (new_timestamp, block_pos(new_timestamp), self.n))
        if base_time(new_timestamp) == self.tbase:
            return self
        self.delete_tables(new_timestamp)
        return self

    def delete_tables(self, new_timestamp=None):
        """Delete the tables for this block.
        """
        if not new_timestamp:
            new_timestamp = self.tbase

        if self.data_points_table:
            # noinspection PyBroadException
            try:
                self.data_points_table.delete()
            except:
                pass
            self.data_points_table = None
            self.dp_writer = None
        if self.index_table:
            try:
                self.index_table.delete()
            except:
                pass
            self.index_table = None

        try:
            self.item.delete()
        except:
            pass

        self.item = Item(self.master, data=dict(self.item.items()))
        self.item['state'] = 'INITIAL'
        self.item['tbase'] = base_time(new_timestamp)
        self.item.save(overwrite=True)

        return self.state

    def turndown_tables(self):
        """Reduce write throughput for this block.
        """
        try:
            self.dp_writer.flush()
        except:
            pass
        self.dp_writer = None
        if self.data_points_table:
            self.data_points_table.update({
                'read':
                config.get().TP_READ_DATAPOINTS / BLOCKS,
                'write':
                1
            })
        if self.index_table:
            self.index_table.update({
                'read': config.get().TP_READ_INDEX_KEY / BLOCKS,
                'write': 1
            })

    @property
    def n(self):
        return self.item['n']

    @property
    def tbase(self):
        return self.item['tbase']

    @property
    def data_points_name(self):
        return self.item['data_points_name']

    @property
    def index_name(self):
        return self.item['index_name']

    @property
    def state(self):
        state = self.item['state']
        if state == 'INITIAL':
            return state
        s1 = self._calc_state(self.data_points_table.describe())
        s2 = self._calc_state(self.index_table.describe())
        if s1 != s2:
            return 'UNDEFINED'
        return s1

    def store_datapoint(self, timestamp, metric, tags, value, domain):
        """Store index key and datapoint value in tables.
        """
        #TODO: exception
        if not self.dp_writer:
            return

        key = util.hdata_points_key(domain, metric, timestamp, tags)
        self._store_index(key, timestamp, metric, tags, domain)
        return self.dp_writer.put_item(
            data={
                'domain_metric_tbase_tags': key,
                'toffset': util.offset_time(timestamp),
                'value': value
            })

    def query_index(self, domain, metric, start_time, end_time):
        """Query index for keys.
        """
        if not self.index_table:
            return []

        key = util.index_hash_key(domain, metric)
        time_range = map(
            str, [util.base_time(start_time),
                  util.base_time(end_time) + 1])
        return [
            IndexKey(k)
            for k in self.index_table.query(consistent=False,
                                            domain_metric__eq=key,
                                            tbase_tags__between=time_range)
        ]

    def query_datapoints(self,
                         index_key,
                         start_time,
                         end_time,
                         attributes=tuple(['value'])):
        """Query datapoints.
        """
        if not self.data_points_table: return []

        key = index_key.to_data_points_key()
        time_range = util.offset_range(index_key, start_time, end_time)
        attributes_ = ['toffset']
        attributes_.extend(attributes)
        return [
            value for value in self.data_points_table.query(
                consistent=False,
                reverse=True,
                attributes=attributes_,
                domain_metric_tbase_tags__eq=key,
                toffset__between=time_range)
        ]

    # noinspection PyMethodMayBeStatic
    def _calc_state(self, desc):
        desc = desc['Table']
        state = desc['TableStatus']
        if state == 'ACTIVE' and desc['ProvisionedThroughput'][
                'WriteCapacityUnits'] == 1:
            state = 'TURNED_DOWN'
        return state

    # noinspection PyMethodMayBeStatic
    def _store_cache(self, key, cache, table, data):
        if not cache.get(key):
            table.put_item(data=data(), overwrite=True)
            cache.put(key, 1)

    def _store_index(self, key, timestamp, metric, tags, domain):
        """Store an index key if not yet stored.
        """
        self._store_cache(
            key, Block.index_key_lru, self.index_table, lambda: {
                'domain_metric': util.index_hash_key(domain, metric),
                'tbase_tags': util.index_range_key(timestamp, tags)
            })

    def __str__(self):
        return str((self.n, self.state, self.tbase, self.data_points_name,
                    self.index_name))

    def __repr__(self):
        return str(self)
Beispiel #6
0
from amondawa import config, util
from amondawa.util import IndexKey
from amondawa.writer import TimedBatchTable

from boto.dynamodb2.fields import HashKey, RangeKey
from boto.dynamodb2.items import Item
from boto.dynamodb2.table import Table
from boto.dynamodb2.types import *
from repoze.lru import LRUCache
from threading import Thread

import time
import traceback

# store history in how many blocks (e.g. 12)
BLOCKS = int(config.get().STORE_HISTORY_BLOCKS) + 1  # +1 bumper
# of what size    (e.g. 30 days)
BLOCK_SIZE = int(config.get().STORE_HISTORY /
                 config.get().STORE_HISTORY_BLOCKS)
# history without bumper
AVAILABLE_HISTORY = (BLOCKS - 1) * BLOCK_SIZE  # -1 bumper
# how long to store data points  (e.g. 360 days)
HISTORY = BLOCKS * BLOCK_SIZE


def base_time(timestamp):
    return timestamp - timestamp % BLOCK_SIZE


def block_pos(timestamp):
    return int((util.base_time(timestamp) % HISTORY) / BLOCK_SIZE)
Beispiel #7
0
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
Classes for querying datapoints.
"""

from amondawa import util, config
from amondawa.mtime import timeit
from concurrent.futures import ThreadPoolExecutor
from pandas.tseries import frequencies as freq
import numpy as np
import pandas as pd

config = config.get()

# TODO: shutdown gracefully
thread_pool = ThreadPoolExecutor(max_workers=config.MT_WRITERS)

# time intervals
FREQ_MILLIS = {
    'milliseconds': 1,
    'seconds': 1000,
    'minutes': 1000 * 60,
    'hours': 1000 * 60 * 60,
    'days': 1000 * 60 * 60 * 24,
    'weeks': 1000 * 60 * 60 * 24 * 7,
    'months': 1000 * 60 * 60 * 24 * 30,
    'years': 1000 * 60 * 60 * 24 * 365
}
Beispiel #8
0
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
Utility classes - string manipulation, key <-> string conversion etc.
"""

from boto.dynamodb.types import get_dynamodb_type, is_str, is_num
from decimal import Decimal
from flask import json
from amondawa import config
import hashlib
import time

MAGIC = '0xCAFEBABE'

COLUMN_HEIGHT = config.get().STORE_COLUMN_HEIGHT


def to_dynamo_compat_type(value):
    try:
        if is_num(value):
            value = Decimal(str(value))
        elif type(value) in (list, tuple):
            value = set(value)
        elif type(value) is dict:
            value = MAGIC + json.dumps(value)
        dtype = get_dynamodb_type(value)
        if dtype == 'NS':
            value = set([Decimal(str(v)) for v in value])
    except TypeError:
        value = MAGIC + json.dumps(value)
Beispiel #9
0
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

"""
Classes for querying datapoints.
"""

from amondawa import util, config
from amondawa.mtime import timeit
from concurrent.futures import ThreadPoolExecutor
from pandas.tseries import frequencies as freq
import numpy as np
import pandas as pd

config = config.get()

# TODO: shutdown gracefully
thread_pool = ThreadPoolExecutor(max_workers=config.MT_WRITERS)

# time intervals
FREQ_MILLIS = {
    'milliseconds': 1,
    'seconds': 1000,
    'minutes': 1000 * 60,
    'hours': 1000 * 60 * 60,
    'days': 1000 * 60 * 60 * 24,
    'weeks': 1000 * 60 * 60 * 24 * 7,
    'months': 1000 * 60 * 60 * 24 * 30,
    'years': 1000 * 60 * 60 * 24 * 365
}
Beispiel #10
0
# IN THE SOFTWARE.

"""
Utility classes - string manipulation, key <-> string conversion etc.
"""

from boto.dynamodb.types import get_dynamodb_type, is_str, is_num
from decimal import Decimal
from flask import json
from amondawa import config
import hashlib
import time

MAGIC = '0xCAFEBABE'

COLUMN_HEIGHT = config.get().STORE_COLUMN_HEIGHT

def to_dynamo_compat_type(value):
    try:
        if is_num(value):
            value = Decimal(str(value))
        elif type(value) in (list, tuple):
            value = set(value)
        elif type(value) is dict:
            value = MAGIC + json.dumps(value)
        dtype = get_dynamodb_type(value)
        if dtype == 'NS':
            value = set([Decimal(str(v)) for v in value])
    except TypeError:
        value = MAGIC + json.dumps(value)
    return value
Beispiel #11
0
class Schema(object):
    # table_names is a dictionary to find this instance's tables e.g.:
    # {
    #   'metric_names': 'amdw1_metric_names',
    #   'tag_names': 'amdw1_tag_names',
    #   'tag_values': 'amdw1_tag_values',
    #   'credentials': 'amdw1_credentials'
    # }
    table_names = config.table_names(['metric_names', 'tag_names', 'tag_values', 'credentials'])
    # these (core_tables) tables won't be deleted by the delete operation below
    core_tables = config.table_names(['credentials', 'config']).values()

    metric_names_tp = {'read': 1, 'write': 1}
    tag_names_tp = {'read': 1, 'write': 1}
    tag_values_tp = {'read': 1, 'write': 1}

    datapoints_lru = LRUCache(config.get().CACHE_DATAPOINTS)
    index_key_lru = LRUCache(config.get().CACHE_QUERY_INDEX_KEY)

    @staticmethod
    # TODO: handle concurrent table operations limit (tables operations > 10)
    def delete(connection):
        """Destructive delete of schema and data.
        """
        for table in Schema.table_names.values():
            try:
                # don't delete credentials table
                if table.table_name not in Schema.core_tables:
                    Table(table, connection=connection).delete()
            except:
                pass

        DatapointsSchema.delete(connection)

    @staticmethod
    def bind(connection):
        """Bind to existing dynamodb tables.
        """
        return dict(((table, Table(table_name, connection=connection)) \
                     for table, table_name in Schema.table_names.items()))

    @staticmethod
    def create(connection):
        """Create dynamodb tables.
        """
        Table.create(config.table_name('metric_names'),
                     schema=[HashKey('domain'), RangeKey('name')],
                     throughput=Schema.metric_names_tp, connection=connection)
        Table.create(config.table_name('tag_names'),
                     schema=[HashKey('domain'), RangeKey('name')],
                     throughput=Schema.tag_names_tp, connection=connection)
        Table.create(config.table_name('tag_values'),
                     schema=[HashKey('domain'), RangeKey('value')],
                     throughput=Schema.tag_values_tp, connection=connection)

        DatapointsSchema.create(connection)

    Credential = collections.namedtuple('Key', 'access_key_id permissions secret_access_key state')

    def __init__(self, connection, start_mx=False):
        """Initilize data structures.
        """
        self.connection = connection
        # TODO these should be LRU cache - this will become too large
        self.metric_name_cache = set()
        self.tag_name_cache = set()
        self.tag_value_cache = set()

        # use table names as var names
        vars(self).update(Schema.bind(connection))

        self.blocks = DatapointsSchema(connection)
        if start_mx:
            self.blocks.start_maintenance()

        @atexit.register
        def close():
            try:
                self.connection.close()
            except:
                pass # called on abruptly exit

    def close(self):
        """Close connection and flush pending operations.
        """
        self.connection.close()

    def get_credentials(self):
        """Get credentials.
        """
        return dict([(item['access_key_id'], Schema.Credential(item['access_key_id'], item['permissions'],
                                                               item['secret_access_key'], item['state'])) for item in
                     self.credentials.scan()])

    def get_metric_names(self, domain):
        """Get all metric names.
        """
        return [item['name'] for item in self.metric_names.query(consistent=False,
                                                                 attributes=['name'], domain__eq=domain)]

    def get_tag_names(self, domain):
        """Get tag names.
        """
        return [item['name'] for item in self.tag_names.query(consistent=False,
                                                              attributes=['name'], domain__eq=domain)]

    def get_tag_values(self, domain):
        """Get all tag values.
        """
        return [item['value'] for item in self.tag_values.query(consistent=False,
                                                                attributes=['value'], domain__eq=domain)]

    def store_datapoint(self, timestamp, metric, tags, value, domain):
        """Store a single datapoint, adding to ancillary tables if required.  This call
           will buffer write operations into the provided writer before sending to
           dynamodb.
        """
        self._store_tags(domain, tags)
        self._store_metric(domain, metric)

        self.blocks.store_datapoint(timestamp, metric, tags, value, domain)

    def query_index(self, domain, metric, start_time, end_time):
        """Query index for keys.
        """
        return self.blocks.query_index(domain, metric, start_time, end_time)

    def query_datapoints(self, index_key, start_time, end_time, attributes=['value']):
        """Query datapoints.
        """
        return self.blocks.query_datapoints(index_key, start_time, end_time, attributes)

    def _store_cache(self, key, cache, table, data):
        if not key in cache:
            table.put_item(data=data(), overwrite=True)
            cache.add(key)

    def _store_tag_name(self, domain, name):
        """Store tag name if not yet stored.
        """
        self._store_cache('|'.join([domain, name]), self.tag_name_cache,
                          self.tag_names, lambda: {'domain': domain, 'name': name})

    def _store_tag_value(self, domain, value):
        """Store tag value if not yet stored.
        """
        self._store_cache('|'.join([domain, value]), self.tag_value_cache,
                          self.tag_values, lambda: {'domain': domain, 'value': value})

    def _store_metric(self, domain, metric):
        """Store metric name if not yet stored.
        """
        self._store_cache('|'.join([domain, metric]), self.metric_name_cache,
                          self.metric_names, lambda: {'domain': domain, 'name': metric})

    def _store_tags(self, domain, tags):
        """Store tags if not yet stored.
        """
        for name, value in tags.items():
            self._store_tag_name(domain, name)
            self._store_tag_value(domain, value)