def __init__(self, conf): url = conf.database.connection # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instanciate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES, AVAILABLE_CAPABILITIES) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correcly updated if # needed. self.upgrade()
def get_capabilities(self): """Return an dictionary representing the capabilities of this driver. """ available = { 'meters': {'query': {'simple': True, 'metadata': True}}, 'resources': {'query': {'simple': True, 'metadata': True}}, 'samples': {'query': {'simple': True, 'metadata': True, 'complex': True}}, 'statistics': {'groupby': True, 'query': {'simple': True, 'metadata': True}, 'aggregation': {'standard': True, 'selectable': { 'max': True, 'min': True, 'sum': True, 'avg': True, 'count': True, 'stddev': True, 'cardinality': True}} }, 'alarms': {'query': {'simple': True, 'complex': True}, 'history': {'query': {'simple': True, 'complex': True}}}, } return utils.update_nested(self.DEFAULT_CAPABILITIES, available)
def __init__(self, conf): url = conf.database.connection # Since we are using pymongo, even though we are connecting to DB2 # we still have to make sure that the scheme which used to distinguish # db2 driver from mongodb driver be replaced so that pymongo will not # produce an exception on the scheme. url = url.replace("db2:", "mongodb:", 1) self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.2 to use aggregate(), since we are using mongodb # as backend for test, the following code is necessary to make sure # that the test wont try aggregate on older mongodb during the test. # For db2, the versionArray won't be part of the server_info, so there # will not be exception when real db2 gets used as backend. server_info = self.conn.server_info() if server_info.get("sysInfo"): self._using_mongodb = True else: self._using_mongodb = False if self._using_mongodb and server_info.get("versionArray") < [2, 2]: raise storage.StorageBadVersion("Need at least MongoDB 2.2") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options["database"]) if connection_options.get("username"): self.db.authenticate(connection_options["username"], connection_options["password"]) self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES, AVAILABLE_CAPABILITIES) self.upgrade()
def get_capabilities(self): """Return an dictionary representing the capabilities of this driver. """ available = { 'meters': {'query': {'simple': True, 'metadata': True}}, 'resources': {'query': {'simple': True, 'metadata': True}}, 'samples': {'query': {'simple': True, 'metadata': True}}, 'statistics': {'query': {'simple': True, 'metadata': True}, 'aggregation': {'standard': True}}, } return utils.update_nested(self.DEFAULT_CAPABILITIES, available)
def __init__(self, conf): """Hbase Connection Initialization.""" opts = self._parse_connection_url(conf.database.connection) if opts['host'] == '__test__': url = os.environ.get('CEILOMETER_TEST_HBASE_URL') if url: # Reparse URL, but from the env variable now opts = self._parse_connection_url(url) self.conn_pool = self._get_connection_pool(opts) else: # This is a in-memory usage for unit tests if Connection._memory_instance is None: LOG.debug(_('Creating a new in-memory HBase ' 'Connection object')) Connection._memory_instance = MConnectionPool() self.conn_pool = Connection._memory_instance else: self.conn_pool = self._get_connection_pool(opts) self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES, AVAILABLE_CAPABILITIES)
class Connection(base.Connection): """Put the data into a HBase database Collections: - meter (describes sample actually): - row-key: consists of reversed timestamp, meter and a message signature for purposes of uniqueness - Column Families: f: contains the following qualifiers: - counter_name: <name of counter> - counter_type: <type of counter> - counter_unit: <unit of counter> - counter_volume: <volume of counter> - message: <raw incoming data> - message_id: <id of message> - message_signature: <signature of message> - resource_metadata: raw metadata for corresponding resource of the meter - project_id: <id of project> - resource_id: <id of resource> - user_id: <id of user> - recorded_at: <datetime when sample has been recorded (utc.now)> - flattened metadata with prefix r_metadata. e.g.:: f:r_metadata.display_name or f:r_metadata.tag - rts: <reversed timestamp of entry> - timestamp: <meter's timestamp (came from message)> - source for meter with prefix 's' - resource: - row_key: uuid of resource - Column Families: f: contains the following qualifiers: - resource_metadata: raw metadata for corresponding resource - project_id: <id of project> - resource_id: <id of resource> - user_id: <id of user> - flattened metadata with prefix r_metadata. e.g.:: f:r_metadata.display_name or f:r_metadata.tag - sources for all corresponding meters with prefix 's' - all meters for this resource in format: .. code-block:: python "%s+%s+%s!%s!%s" % (rts, source, counter_name, counter_type, counter_unit) - events: - row_key: timestamp of event's generation + uuid of event in format: "%s+%s" % (ts, Event.message_id) - Column Families: f: contains the following qualifiers: - event_type: description of event's type - timestamp: time stamp of event generation - all traits for this event in format: .. code-block:: python "%s+%s" % (trait_name, trait_type) """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None RESOURCE_TABLE = "resource" METER_TABLE = "meter" EVENT_TABLE = "event" def __init__(self, url): """Hbase Connection Initialization.""" opts = self._parse_connection_url(url) if opts['host'] == '__test__': url = os.environ.get('CEILOMETER_TEST_HBASE_URL') if url: # Reparse URL, but from the env variable now opts = self._parse_connection_url(url) self.conn_pool = self._get_connection_pool(opts) else: # This is a in-memory usage for unit tests if Connection._memory_instance is None: LOG.debug( _('Creating a new in-memory HBase ' 'Connection object')) Connection._memory_instance = ( hbase_inmemory.MConnectionPool()) self.conn_pool = Connection._memory_instance else: self.conn_pool = self._get_connection_pool(opts) def upgrade(self): with self.conn_pool.connection() as conn: conn.create_table(self.RESOURCE_TABLE, {'f': dict(max_versions=1)}) conn.create_table(self.METER_TABLE, {'f': dict(max_versions=1)}) conn.create_table(self.EVENT_TABLE, {'f': dict(max_versions=1)}) def clear(self): LOG.debug(_('Dropping HBase schema...')) with self.conn_pool.connection() as conn: for table in [ self.RESOURCE_TABLE, self.METER_TABLE, self.EVENT_TABLE ]: try: conn.disable_table(table) except Exception: LOG.debug(_('Cannot disable table but ignoring error')) try: conn.delete_table(table) except Exception: LOG.debug(_('Cannot delete table but ignoring error')) @staticmethod def _get_connection_pool(conf): """Return a connection pool to the database. .. note:: The tests use a subclass to override this and return an in-memory connection pool. """ LOG.debug( _('connecting to HBase on %(host)s:%(port)s') % ({ 'host': conf['host'], 'port': conf['port'] })) return happybase.ConnectionPool(size=100, host=conf['host'], port=conf['port'], table_prefix=conf['table_prefix']) @staticmethod def _parse_connection_url(url): """Parse connection parameters from a database url. .. note:: HBase Thrift does not support authentication and there is no database name, so we are not looking for these in the url. """ opts = {} result = network_utils.urlsplit(url) opts['table_prefix'] = urlparse.parse_qs(result.query).get( 'table_prefix', [None])[0] opts['dbtype'] = result.scheme if ':' in result.netloc: opts['host'], port = result.netloc.split(':') else: opts['host'] = result.netloc port = 9090 opts['port'] = port and int(port) or 9090 return opts def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) meter_table = conn.table(self.METER_TABLE) resource_metadata = data.get('resource_metadata', {}) # Determine the name of new meter rts = hbase_utils.timestamp(data['timestamp']) new_meter = hbase_utils.format_meter_reference( data['counter_name'], data['counter_type'], data['counter_unit'], rts, data['source']) # TODO(nprivalova): try not to store resource_id resource = hbase_utils.serialize_entry( **{ 'source': data['source'], 'meter': { new_meter: data['timestamp'] }, 'resource_metadata': resource_metadata, 'resource_id': data['resource_id'], 'project_id': data['project_id'], 'user_id': data['user_id'] }) # Here we put entry in HBase with our own timestamp. This is needed # when samples arrive out-of-order # If we use timestamp=data['timestamp'] the newest data will be # automatically 'on the top'. It is needed to keep metadata # up-to-date: metadata from newest samples is considered as actual. ts = int(time.mktime(data['timestamp'].timetuple()) * 1000) resource_table.put(data['resource_id'], resource, ts) # Rowkey consists of reversed timestamp, meter and a # message signature for purposes of uniqueness row = "%s_%d_%s" % (data['counter_name'], rts, data['message_signature']) record = hbase_utils.serialize_entry( data, **{ 'source': data['source'], 'rts': rts, 'message': data, 'recorded_at': timeutils.utcnow() }) meter_table.put(row, record) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like ge, gt. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') q = hbase_utils.make_query(metaquery=metaquery, user_id=user, project_id=project, resource_id=resource, source=source) q = hbase_utils.make_meter_query_for_resource(start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, source, q) with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) LOG.debug(_("Query Resource table: %s") % q) for resource_id, data in resource_table.scan(filter=q): f_res, sources, meters, md = hbase_utils.deserialize_entry( data) # Unfortunately happybase doesn't keep ordered result from # HBase. So that's why it's needed to find min and max # manually first_ts = min(meters, key=operator.itemgetter(1))[1] last_ts = max(meters, key=operator.itemgetter(1))[1] source = meters[0][0].split('+')[1] # If we use QualifierFilter then HBase returnes only # qualifiers filtered by. It will not return the whole entry. # That's why if we need to ask additional qualifiers manually. if 'project_id' not in f_res and 'user_id' not in f_res: row = resource_table.row(resource_id, columns=[ 'f:project_id', 'f:user_id', 'f:resource_metadata' ]) f_res, _s, _m, md = hbase_utils.deserialize_entry(row) yield models.Resource(resource_id=resource_id, first_sample_timestamp=first_ts, last_sample_timestamp=last_ts, project_id=f_res['project_id'], source=source, user_id=f_res['user_id'], metadata=md) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ metaquery = metaquery or {} if pagination: raise NotImplementedError(_('Pagination not implemented')) with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) q = hbase_utils.make_query(metaquery=metaquery, user_id=user, project_id=project, resource_id=resource, source=source) LOG.debug(_("Query Resource table: %s") % q) gen = resource_table.scan(filter=q) # We need result set to be sure that user doesn't receive several # same meters. Please see bug # https://bugs.launchpad.net/ceilometer/+bug/1301371 result = set() for ignored, data in gen: flatten_result, s, meters, md = hbase_utils.deserialize_entry( data) for m in meters: _m_rts, m_source, m_raw = m[0].split("+") name, type, unit = m_raw.split('!') meter_dict = { 'name': name, 'type': type, 'unit': unit, 'resource_id': flatten_result['resource_id'], 'project_id': flatten_result['project_id'], 'user_id': flatten_result['user_id'] } frozen_meter = frozenset(meter_dict.items()) if frozen_meter in result: continue result.add(frozen_meter) meter_dict.update( {'source': m_source if m_source else None}) yield models.Meter(**meter_dict) def get_samples(self, sample_filter, limit=None): """Return an iterable of models.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop, columns = ( hbase_utils.make_sample_query_from_filter(sample_filter, require_meter=False)) LOG.debug(_("Query Meter Table: %s") % q) gen = meter_table.scan(filter=q, row_start=start, row_stop=stop, limit=limit) for ignored, meter in gen: d_meter = hbase_utils.deserialize_entry(meter)[0] d_meter['message']['recorded_at'] = d_meter['recorded_at'] yield models.Sample(**d_meter['message']) @staticmethod def _update_meter_stats(stat, meter): """Do the stats calculation on a requested time bucket in stats dict :param stats: dict where aggregated stats are kept :param index: time bucket index in stats :param meter: meter record as returned from HBase :param start_time: query start time :param period: length of the time bucket """ vol = meter['counter_volume'] ts = meter['timestamp'] stat.unit = meter['counter_unit'] stat.min = min(vol, stat.min or vol) stat.max = max(vol, stat.max) stat.sum = vol + (stat.sum or 0) stat.count += 1 stat.avg = (stat.sum / float(stat.count)) stat.duration_start = min(ts, stat.duration_start or ts) stat.duration_end = max(ts, stat.duration_end or ts) stat.duration = (timeutils.delta_seconds(stat.duration_start, stat.duration_end)) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. .. note:: Due to HBase limitations the aggregations are implemented in the driver itself, therefore this method will be quite slow because of all the Thrift traffic it is going to create. """ if groupby: raise NotImplementedError("Group by not implemented.") if aggregate: raise NotImplementedError('Selectable aggregates not implemented') with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop, columns = ( hbase_utils.make_sample_query_from_filter(sample_filter)) # These fields are used in statistics' calculating columns.extend( ['f:timestamp', 'f:counter_volume', 'f:counter_unit']) meters = map( hbase_utils.deserialize_entry, list(meter for (ignored, meter) in meter_table.scan( filter=q, row_start=start, row_stop=stop, columns=columns))) if sample_filter.start: start_time = sample_filter.start elif meters: start_time = meters[-1][0]['timestamp'] else: start_time = None if sample_filter.end: end_time = sample_filter.end elif meters: end_time = meters[0][0]['timestamp'] else: end_time = None results = [] if not period: period = 0 period_start = start_time period_end = end_time # As our HBase meters are stored as newest-first, we need to iterate # in the reverse order for meter in meters[::-1]: ts = meter[0]['timestamp'] if period: offset = int( timeutils.delta_seconds(start_time, ts) / period) * period period_start = start_time + datetime.timedelta(0, offset) if not results or not results[-1].period_start == period_start: if period: period_end = period_start + datetime.timedelta(0, period) results.append( models.Statistics(unit='', count=0, min=0, max=0, avg=0, sum=0, period=period, period_start=period_start, period_end=period_end, duration=None, duration_start=None, duration_end=None, groupby=None)) self._update_meter_stats(results[-1], meter[0]) return results def record_events(self, event_models): """Write the events to Hbase. :param event_models: a list of models.Event objects. :return problem_events: a list of events that could not be saved in a (reason, event) tuple. From the reasons that are enumerated in storage.models.Event only the UNKNOWN_PROBLEM is applicable here. """ problem_events = [] with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) for event_model in event_models: # Row key consists of timestamp and message_id from # models.Event or purposes of storage event sorted by # timestamp in the database. ts = event_model.generated row = "%d_%s" % (hbase_utils.timestamp( ts, reverse=False), event_model.message_id) event_type = event_model.event_type traits = {} if event_model.traits: for trait in event_model.traits: key = "%s+%d" % (trait.name, trait.dtype) traits[key] = trait.value record = hbase_utils.serialize_entry(traits, event_type=event_type, timestamp=ts) try: events_table.put(row, record) except Exception as ex: LOG.debug(_("Failed to record event: %s") % ex) problem_events.append( (models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. """ q, start, stop = hbase_utils.make_events_query_from_filter( event_filter) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q, row_start=start, row_stop=stop) for event_id, data in gen: traits = [] events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if (not key.startswith('event_type') and not key.startswith('timestamp')): trait_name, trait_dtype = key.rsplit('+', 1) traits.append( models.Trait(name=trait_name, dtype=int(trait_dtype), value=value)) ts, mess = event_id.split('_', 1) yield models.Event(message_id=mess, event_type=events_dict['event_type'], generated=events_dict['timestamp'], traits=sorted(traits, key=operator.attrgetter('dtype'))) def get_event_types(self): """Return all event types as an iterable of strings.""" with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan() event_types = set() for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if key.startswith('event_type'): if value not in event_types: event_types.add(value) yield value def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ q = hbase_utils.make_query(event_type=event_type) trait_names = set() with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if (not key.startswith('event_type') and not key.startswith('timestamp')): trait_name, trait_type = key.rsplit('+', 1) if trait_name not in trait_names: # Here we check that our method return only unique # trait types, for ex. if it is found the same trait # types in different events with equal event_type, # method will return only one trait type. It is # proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) data_type = models.Trait.type_names[int(trait_type)] yield {'name': trait_name, 'data_type': data_type} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ q = hbase_utils.make_query(event_type=event_type, trait_type=trait_type) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if (not key.startswith('event_type') and not key.startswith('timestamp')): trait_name, trait_type = key.rsplit('+', 1) yield models.Trait(name=trait_name, dtype=int(trait_type), value=value)
class Connection(base.Connection): """Base Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, limit=None): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param limit: Maximum number of results to return. """ if limit == 0: return metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {} q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if resource is not None: q['_id'] = resource if source is not None: q['source'] = source q.update(metaquery) count = 0 for r in self.db.resource.find(q): for r_meter in r['meter']: if limit and count >= limit: return else: count += 1 yield models.Meter( name=r_meter['counter_name'], type=r_meter['counter_type'], # Return empty string if 'counter_unit' is not valid for # backward compatibility. unit=r_meter.get('counter_unit', ''), resource_id=r['_id'], project_id=r['project_id'], source=r['source'], user_id=r['user_id'], ) def get_samples(self, sample_filter, limit=None): """Return an iterable of model.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] q = pymongo_utils.make_query_from_filter(sample_filter, require_meter=False) return self._retrieve_samples(q, [("timestamp", pymongo.DESCENDING)], limit) def query_samples(self, filter_expr=None, orderby=None, limit=None): if limit == 0: return [] query_filter = {} orderby_filter = [("timestamp", pymongo.DESCENDING)] transformer = pymongo_utils.QueryTransformer() if orderby is not None: orderby_filter = transformer.transform_orderby(orderby) if filter_expr is not None: query_filter = transformer.transform_filter(filter_expr) return self._retrieve_samples(query_filter, orderby_filter, limit) def _retrieve_samples(self, query, orderby, limit): if limit is not None: samples = self.db.meter.find(query, limit=limit, sort=orderby) else: samples = self.db.meter.find(query, sort=orderby) for s in samples: # Remove the ObjectId generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. del s['_id'] # Backward compatibility for samples without units s['counter_unit'] = s.get('counter_unit', '') # Tolerate absence of recorded_at in older datapoints s['recorded_at'] = s.get('recorded_at') # Check samples for metadata and "unquote" key if initially it # was started with '$'. if s.get('resource_metadata'): s['resource_metadata'] = pymongo_utils.unquote_keys( s.get('resource_metadata')) yield models.Sample(**s)
class Connection(base.Connection): """HBase connection. """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) _memory_instance = None PROJECT_TABLE = "project" USER_TABLE = "user" RESOURCE_TABLE = "resource" METER_TABLE = "meter" ALARM_TABLE = "alarm" ALARM_HISTORY_TABLE = "alarm_h" def __init__(self, conf): """Hbase Connection Initialization.""" opts = self._parse_connection_url(conf.database.connection) if opts['host'] == '__test__': url = os.environ.get('CEILOMETER_TEST_HBASE_URL') if url: # Reparse URL, but from the env variable now opts = self._parse_connection_url(url) self.conn_pool = self._get_connection_pool(opts) else: # This is a in-memory usage for unit tests if Connection._memory_instance is None: LOG.debug( _('Creating a new in-memory HBase ' 'Connection object')) Connection._memory_instance = MConnectionPool() self.conn_pool = Connection._memory_instance else: self.conn_pool = self._get_connection_pool(opts) def upgrade(self): with self.conn_pool.connection() as conn: conn.create_table(self.PROJECT_TABLE, {'f': dict()}) conn.create_table(self.USER_TABLE, {'f': dict()}) conn.create_table(self.RESOURCE_TABLE, {'f': dict()}) conn.create_table(self.METER_TABLE, {'f': dict()}) conn.create_table(self.ALARM_TABLE, {'f': dict()}) conn.create_table(self.ALARM_HISTORY_TABLE, {'f': dict()}) def clear(self): LOG.debug(_('Dropping HBase schema...')) with self.conn_pool.connection() as conn: for table in [ self.PROJECT_TABLE, self.USER_TABLE, self.RESOURCE_TABLE, self.METER_TABLE, self.ALARM_TABLE, self.ALARM_HISTORY_TABLE ]: try: conn.disable_table(table) except Exception: LOG.debug(_('Cannot disable table but ignoring error')) try: conn.delete_table(table) except Exception: LOG.debug(_('Cannot delete table but ignoring error')) @staticmethod def _get_connection_pool(conf): """Return a connection pool to the database. .. note:: The tests use a subclass to override this and return an in-memory connection pool. """ LOG.debug( _('connecting to HBase on %(host)s:%(port)s') % ({ 'host': conf['host'], 'port': conf['port'] })) return happybase.ConnectionPool(size=100, host=conf['host'], port=conf['port'], table_prefix=conf['table_prefix']) @staticmethod def _parse_connection_url(url): """Parse connection parameters from a database url. .. note:: HBase Thrift does not support authentication and there is no database name, so we are not looking for these in the url. """ opts = {} result = network_utils.urlsplit(url) opts['table_prefix'] = urlparse.parse_qs(result.query).get( 'table_prefix', [None])[0] opts['dbtype'] = result.scheme if ':' in result.netloc: opts['host'], port = result.netloc.split(':') else: opts['host'] = result.netloc port = 9090 opts['port'] = port and int(port) or 9090 return opts def update_alarm(self, alarm): """Create an alarm. :param alarm: The alarm to create. It is Alarm object, so we need to call as_dict() """ _id = alarm.alarm_id alarm_to_store = serialize_entry(alarm.as_dict()) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.put(_id, alarm_to_store) stored_alarm = deserialize_entry(alarm_table.row(_id))[0] return models.Alarm(**stored_alarm) create_alarm = update_alarm def delete_alarm(self, alarm_id): with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.delete(alarm_id) def get_alarms(self, name=None, user=None, project=None, enabled=None, alarm_id=None, pagination=None): if pagination: raise NotImplementedError('Pagination not implemented') q = make_query(alarm_id=alarm_id, name=name, enabled=enabled, user_id=user, project_id=project) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) gen = alarm_table.scan(filter=q) for ignored, data in gen: stored_alarm = deserialize_entry(data)[0] yield models.Alarm(**stored_alarm) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): q = make_query(alarm_id=alarm_id, on_behalf_of=on_behalf_of, type=type, user_id=user, project_id=project) start_row, end_row = make_timestamp_query(_make_general_rowkey_scan, start=start_timestamp, start_op=start_timestamp_op, end=end_timestamp, end_op=end_timestamp_op, bounds_only=True, some_id=alarm_id) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) gen = alarm_history_table.scan(filter=q, row_start=start_row, row_stop=end_row) for ignored, data in gen: stored_entry = deserialize_entry(data)[0] yield models.AlarmChange(**stored_entry) def record_alarm_change(self, alarm_change): """Record alarm change event. """ alarm_change_dict = serialize_entry(alarm_change) ts = alarm_change.get('timestamp') or datetime.datetime.now() rts = reverse_timestamp(ts) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) alarm_history_table.put( alarm_change.get('alarm_id') + "_" + str(rts), alarm_change_dict) def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ with self.conn_pool.connection() as conn: project_table = conn.table(self.PROJECT_TABLE) user_table = conn.table(self.USER_TABLE) resource_table = conn.table(self.RESOURCE_TABLE) meter_table = conn.table(self.METER_TABLE) # Make sure we know about the user and project if data['user_id']: self._update_sources(user_table, data['user_id'], data['source']) self._update_sources(project_table, data['project_id'], data['source']) # Get metadata from user's data resource_metadata = data.get('resource_metadata', {}) # Determine the name of new meter new_meter = _format_meter_reference(data['counter_name'], data['counter_type'], data['counter_unit']) flatten_result, sources, meters, metadata = \ deserialize_entry(resource_table.row(data['resource_id'])) # Update if resource has new information if (data['source'] not in sources) or (new_meter not in meters) or ( metadata != resource_metadata): resource_table.put( data['resource_id'], serialize_entry( **{ 'sources': [data['source']], 'meters': [new_meter], 'metadata': resource_metadata, 'resource_id': data['resource_id'], 'project_id': data['project_id'], 'user_id': data['user_id'] })) # Rowkey consists of reversed timestamp, meter and an md5 of # user+resource+project for purposes of uniqueness m = hashlib.md5() m.update( "%s%s%s" % (data['user_id'], data['resource_id'], data['project_id'])) # We use reverse timestamps in rowkeys as they are sorted # alphabetically. rts = reverse_timestamp(data['timestamp']) row = "%s_%d_%s" % (data['counter_name'], rts, m.hexdigest()) record = serialize_entry( data, **{ 'metadata': resource_metadata, 'rts': rts, 'message': data, 'recorded_at': timeutils.utcnow() }) meter_table.put(row, record) def _update_sources(self, table, id, source): user, sources, _, _ = deserialize_entry(table.row(id)) if source not in sources: sources.append(source) table.put(id, serialize_entry(user, **{'sources': sources})) def get_users(self, source=None): """Return an iterable of user id strings. :param source: Optional source filter. """ with self.conn_pool.connection() as conn: user_table = conn.table(self.USER_TABLE) LOG.debug(_("source: %s") % source) scan_args = {} if source: scan_args['columns'] = ['f:s_%s' % source] return sorted(key for key, ignored in user_table.scan(**scan_args)) def get_projects(self, source=None): """Return an iterable of project id strings. :param source: Optional source filter. """ with self.conn_pool.connection() as conn: project_table = conn.table(self.PROJECT_TABLE) LOG.debug(_("source: %s") % source) scan_args = {} if source: scan_args['columns'] = ['f:s_%s' % source] return (key for key, ignored in project_table.scan(**scan_args)) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery={}, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like ge, gt. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') sample_filter = storage.SampleFilter( user=user, project=project, start=start_timestamp, start_timestamp_op=start_timestamp_op, end=end_timestamp, end_timestamp_op=end_timestamp_op, resource=resource, source=source, metaquery=metaquery) q, start_row, stop_row = make_sample_query_from_filter( sample_filter, require_meter=False) with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) LOG.debug(_("Query Meter table: %s") % q) meters = meter_table.scan(filter=q, row_start=start_row, row_stop=stop_row) d_meters = [] for i, m in meters: d_meters.append(deserialize_entry(m)) # We have to sort on resource_id before we can group by it. # According to the itertools documentation a new group is # generated when the value of the key function changes # (it breaks there). meters = sorted(d_meters, key=_resource_id_from_record_tuple) for resource_id, r_meters in itertools.groupby( meters, key=_resource_id_from_record_tuple): # We need deserialized entry(data[0]) and metadata(data[3]) meter_rows = [(data[0], data[3]) for data in sorted( r_meters, key=_timestamp_from_record_tuple)] latest_data = meter_rows[-1] min_ts = meter_rows[0][0]['timestamp'] max_ts = latest_data[0]['timestamp'] yield models.Resource( resource_id=resource_id, first_sample_timestamp=min_ts, last_sample_timestamp=max_ts, project_id=latest_data[0]['project_id'], source=latest_data[0]['source'], user_id=latest_data[0]['user_id'], metadata=latest_data[1], ) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery={}, pagination=None): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError(_('Pagination not implemented')) with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) q = make_query(metaquery=metaquery, user_id=user, project_id=project, resource_id=resource, source=source) LOG.debug(_("Query Resource table: %s") % q) gen = resource_table.scan(filter=q) for ignored, data in gen: flatten_result, s, m, md = deserialize_entry(data) if not m: continue # Meter table may have only one "meter" and "source". That's # why only first lists element is get in this method name, type, unit = m[0].split("!") yield models.Meter( name=name, type=type, unit=unit, resource_id=flatten_result['resource_id'], project_id=flatten_result['project_id'], source=s[0] if s else None, user_id=flatten_result['user_id'], ) def get_samples(self, sample_filter, limit=None): """Return an iterable of models.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop = make_sample_query_from_filter(sample_filter, require_meter=False) LOG.debug(_("Query Meter Table: %s") % q) gen = meter_table.scan(filter=q, row_start=start, row_stop=stop) for ignored, meter in gen: if limit is not None: if limit == 0: break else: limit -= 1 d_meter = deserialize_entry(meter)[0] d_meter['message']['recorded_at'] = d_meter['recorded_at'] yield models.Sample(**d_meter['message']) @staticmethod def _update_meter_stats(stat, meter): """Do the stats calculation on a requested time bucket in stats dict :param stats: dict where aggregated stats are kept :param index: time bucket index in stats :param meter: meter record as returned from HBase :param start_time: query start time :param period: length of the time bucket """ vol = meter['counter_volume'] ts = meter['timestamp'] stat.unit = meter['counter_unit'] stat.min = min(vol, stat.min or vol) stat.max = max(vol, stat.max) stat.sum = vol + (stat.sum or 0) stat.count += 1 stat.avg = (stat.sum / float(stat.count)) stat.duration_start = min(ts, stat.duration_start or ts) stat.duration_end = max(ts, stat.duration_end or ts) stat.duration = \ timeutils.delta_seconds(stat.duration_start, stat.duration_end) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instances containing meter statistics described by the query parameters. The filter must have a meter value set. .. note:: Due to HBase limitations the aggregations are implemented in the driver itself, therefore this method will be quite slow because of all the Thrift traffic it is going to create. """ if groupby: raise NotImplementedError("Group by not implemented.") if aggregate: raise NotImplementedError('Selectable aggregates not implemented') with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop = make_sample_query_from_filter(sample_filter) meters = map( deserialize_entry, list(meter for (ignored, meter) in meter_table.scan( filter=q, row_start=start, row_stop=stop))) if sample_filter.start: start_time = sample_filter.start elif meters: start_time = meters[-1][0]['timestamp'] else: start_time = None if sample_filter.end: end_time = sample_filter.end elif meters: end_time = meters[0][0]['timestamp'] else: end_time = None results = [] if not period: period = 0 period_start = start_time period_end = end_time # As our HBase meters are stored as newest-first, we need to iterate # in the reverse order for meter in meters[::-1]: ts = meter[0]['timestamp'] if period: offset = int( timeutils.delta_seconds(start_time, ts) / period) * period period_start = start_time + datetime.timedelta(0, offset) if not results or not results[-1].period_start == \ period_start: if period: period_end = period_start + datetime.timedelta(0, period) results.append( models.Statistics(unit='', count=0, min=0, max=0, avg=0, sum=0, period=period, period_start=period_start, period_end=period_end, duration=None, duration_start=None, duration_end=None, groupby=None)) self._update_meter_stats(results[-1], meter[0]) return results
class Connection(pymongo_base.Connection): """Put the data into a MongoDB database Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() REDUCE_GROUP_CLEAN = bson.code.Code(""" function ( curr, result ) { if (result.resources.indexOf(curr.resource_id) < 0) result.resources.push(curr.resource_id); } """) STANDARD_AGGREGATES = dict( emit_initial=dict( sum='', count='', avg='', min='', max='' ), emit_body=dict( sum='sum: this.counter_volume,', count='count: NumberInt(1),', avg='acount: NumberInt(1), asum: this.counter_volume,', min='min: this.counter_volume,', max='max: this.counter_volume,' ), reduce_initial=dict( sum='', count='', avg='', min='', max='' ), reduce_body=dict( sum='sum: values[0].sum,', count='count: values[0].count,', avg='acount: values[0].acount, asum: values[0].asum,', min='min: values[0].min,', max='max: values[0].max,' ), reduce_computation=dict( sum='res.sum += values[i].sum;', count='res.count = NumberInt(res.count + values[i].count);', avg=('res.acount = NumberInt(res.acount + values[i].acount);' 'res.asum += values[i].asum;'), min='if ( values[i].min < res.min ) {res.min = values[i].min;}', max='if ( values[i].max > res.max ) {res.max = values[i].max;}' ), finalize=dict( sum='', count='', avg='value.avg = value.asum / value.acount;', min='', max='' ), ) UNPARAMETERIZED_AGGREGATES = dict( emit_initial=dict( stddev=( '' ) ), emit_body=dict( stddev='sdsum: this.counter_volume,' 'sdcount: 1,' 'weighted_distances: 0,' 'stddev: 0,' ), reduce_initial=dict( stddev='' ), reduce_body=dict( stddev='sdsum: values[0].sdsum,' 'sdcount: values[0].sdcount,' 'weighted_distances: values[0].weighted_distances,' 'stddev: values[0].stddev,' ), reduce_computation=dict( stddev=( 'var deviance = (res.sdsum / res.sdcount) - values[i].sdsum;' 'var weight = res.sdcount / ++res.sdcount;' 'res.weighted_distances += (Math.pow(deviance, 2) * weight);' 'res.sdsum += values[i].sdsum;' ) ), finalize=dict( stddev=( 'value.stddev = Math.sqrt(value.weighted_distances /' ' value.sdcount);' ) ), ) PARAMETERIZED_AGGREGATES = dict( validate=dict( cardinality=lambda p: p in ['resource_id', 'user_id', 'project_id', 'source'] ), emit_initial=dict( cardinality=( 'aggregate["cardinality/%(aggregate_param)s"] = 1;' 'var distinct_%(aggregate_param)s = {};' 'distinct_%(aggregate_param)s[this["%(aggregate_param)s"]]' ' = true;' ) ), emit_body=dict( cardinality=( 'distinct_%(aggregate_param)s : distinct_%(aggregate_param)s,' '%(aggregate_param)s : this["%(aggregate_param)s"],' ) ), reduce_initial=dict( cardinality='' ), reduce_body=dict( cardinality=( 'aggregate : values[0].aggregate,' 'distinct_%(aggregate_param)s:' ' values[0].distinct_%(aggregate_param)s,' '%(aggregate_param)s : values[0]["%(aggregate_param)s"],' ) ), reduce_computation=dict( cardinality=( 'if (!(values[i]["%(aggregate_param)s"] in' ' res.distinct_%(aggregate_param)s)) {' ' res.distinct_%(aggregate_param)s[values[i]' ' ["%(aggregate_param)s"]] = true;' ' res.aggregate["cardinality/%(aggregate_param)s"] += 1;}' ) ), finalize=dict( cardinality='' ), ) EMIT_STATS_COMMON = """ var aggregate = {}; %(aggregate_initial_placeholder)s emit(%(key_val)s, { unit: this.counter_unit, aggregate : aggregate, %(aggregate_body_placeholder)s groupby : %(groupby_val)s, duration_start : this.timestamp, duration_end : this.timestamp, period_start : %(period_start_val)s, period_end : %(period_end_val)s} ) """ MAP_STATS_PERIOD_VAR = """ var period = %(period)d * 1000; var period_first = %(period_first)d * 1000; var period_start = period_first + (Math.floor(new Date(this.timestamp.getTime() - period_first) / period) * period); """ MAP_STATS_GROUPBY_VAR = """ var groupby_fields = %(groupby_fields)s; var groupby = {}; var groupby_key = {}; for ( var i=0; i<groupby_fields.length; i++ ) { groupby[groupby_fields[i]] = this[groupby_fields[i]] groupby_key[groupby_fields[i]] = this[groupby_fields[i]] } """ PARAMS_MAP_STATS = { 'key_val': '\'statistics\'', 'groupby_val': 'null', 'period_start_val': 'this.timestamp', 'period_end_val': 'this.timestamp', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS = bson.code.Code("function () {" + EMIT_STATS_COMMON % PARAMS_MAP_STATS + "}") PARAMS_MAP_STATS_PERIOD = { 'key_val': 'period_start', 'groupby_val': 'null', 'period_start_val': 'new Date(period_start)', 'period_end_val': 'new Date(period_start + period)', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_PERIOD = bson.code.Code( "function () {" + MAP_STATS_PERIOD_VAR + EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD + "}") PARAMS_MAP_STATS_GROUPBY = { 'key_val': 'groupby_key', 'groupby_val': 'groupby', 'period_start_val': 'this.timestamp', 'period_end_val': 'this.timestamp', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_GROUPBY = bson.code.Code( "function () {" + MAP_STATS_GROUPBY_VAR + EMIT_STATS_COMMON % PARAMS_MAP_STATS_GROUPBY + "}") PARAMS_MAP_STATS_PERIOD_GROUPBY = { 'key_val': 'groupby_key', 'groupby_val': 'groupby', 'period_start_val': 'new Date(period_start)', 'period_end_val': 'new Date(period_start + period)', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_PERIOD_GROUPBY = bson.code.Code( "function () {" + MAP_STATS_PERIOD_VAR + MAP_STATS_GROUPBY_VAR + " groupby_key['period_start'] = period_start\n" + EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD_GROUPBY + "}") REDUCE_STATS = bson.code.Code(""" function (key, values) { %(aggregate_initial_val)s var res = { unit: values[0].unit, aggregate: values[0].aggregate, %(aggregate_body_val)s groupby: values[0].groupby, period_start: values[0].period_start, period_end: values[0].period_end, duration_start: values[0].duration_start, duration_end: values[0].duration_end }; for ( var i=1; i<values.length; i++ ) { %(aggregate_computation_val)s if ( values[i].duration_start < res.duration_start ) res.duration_start = values[i].duration_start; if ( values[i].duration_end > res.duration_end ) res.duration_end = values[i].duration_end; } return res; } """) FINALIZE_STATS = bson.code.Code(""" function (key, value) { %(aggregate_val)s value.duration = (value.duration_end - value.duration_start) / 1000; value.period = NumberInt((value.period_end - value.period_start) / 1000); return value; }""") SORT_OPERATION_MAPPING = {'desc': (pymongo.DESCENDING, '$lt'), 'asc': (pymongo.ASCENDING, '$gt')} MAP_RESOURCES = bson.code.Code(""" function () { emit(this.resource_id, {user_id: this.user_id, project_id: this.project_id, source: this.source, first_timestamp: this.timestamp, last_timestamp: this.timestamp, metadata: this.resource_metadata}) }""") REDUCE_RESOURCES = bson.code.Code(""" function (key, values) { var merge = {user_id: values[0].user_id, project_id: values[0].project_id, source: values[0].source, first_timestamp: values[0].first_timestamp, last_timestamp: values[0].last_timestamp, metadata: values[0].metadata} values.forEach(function(value) { if (merge.first_timestamp - value.first_timestamp > 0) { merge.first_timestamp = value.first_timestamp; merge.user_id = value.user_id; merge.project_id = value.project_id; merge.source = value.source; } else if (merge.last_timestamp - value.last_timestamp <= 0) { merge.last_timestamp = value.last_timestamp; merge.metadata = value.metadata; } }); return merge; }""") _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1) _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR, month=12, day=31, hour=23, minute=59, second=59) def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instanciate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correcly updated if # needed. self.upgrade() def upgrade(self): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. name_qualifier = dict(user_id='', project_id='project_') background = dict(user_id=False, project_id=True) for primary in ['user_id', 'project_id']: name = 'resource_%sidx' % name_qualifier[primary] self.db.resource.ensure_index([ (primary, pymongo.ASCENDING), ('source', pymongo.ASCENDING), ], name=name, background=background[primary]) name = 'meter_%sidx' % name_qualifier[primary] self.db.meter.ensure_index([ ('resource_id', pymongo.ASCENDING), (primary, pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ('source', pymongo.ASCENDING), ], name=name, background=background[primary]) self.db.resource.ensure_index([('last_sample_timestamp', pymongo.DESCENDING)], name='last_sample_timestamp_idx', sparse=True) self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') # remove API v1 related table self.db.user.drop() self.db.project.drop() indexes = self.db.meter.index_information() ttl = cfg.CONF.database.time_to_live if ttl <= 0: if 'meter_ttl' in indexes: self.db.meter.drop_index('meter_ttl') return if 'meter_ttl' in indexes: # NOTE(sileht): manually check expireAfterSeconds because # ensure_index doesn't update index options if the index already # exists if ttl == indexes['meter_ttl'].get('expireAfterSeconds', -1): return self.db.meter.drop_index('meter_ttl') self.db.meter.create_index( [('timestamp', pymongo.ASCENDING)], expireAfterSeconds=ttl, name='meter_ttl' ) def clear(self): self.conn.drop_database(self.db) # Connection will be reopened automatically if needed self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata - we use $setOnInsert to # unconditionally insert sample timestamps and resource metadata # (in the update case, this must be conditional on the sample not # being out-of-order) resource = self.db.resource.find_and_modify( {'_id': data['resource_id']}, {'$set': {'project_id': data['project_id'], 'user_id': data['user_id'], 'source': data['source'], }, '$setOnInsert': {'metadata': data['resource_metadata'], 'first_sample_timestamp': data['timestamp'], 'last_sample_timestamp': data['timestamp'], }, '$addToSet': {'meter': {'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, new=True, ) # only update last sample timestamp if actually later (the usual # in-order case) last_sample_timestamp = resource.get('last_sample_timestamp') if (last_sample_timestamp is None or last_sample_timestamp <= data['timestamp']): self.db.resource.update( {'_id': data['resource_id']}, {'$set': {'metadata': data['resource_metadata'], 'last_sample_timestamp': data['timestamp']}} ) # only update first sample timestamp if actually earlier (the unusual # out-of-order case) # NOTE: a null first sample timestamp is not updated as this indicates # a pre-existing resource document dating from before we started # recording these timestamps in the resource collection first_sample_timestamp = resource.get('first_sample_timestamp') if (first_sample_timestamp is not None and first_sample_timestamp > data['timestamp']): self.db.resource.update( {'_id': data['resource_id']}, {'$set': {'first_sample_timestamp': data['timestamp']}} ) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() self.db.meter.insert(record) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ results = self.db.meter.group( key={}, condition={}, reduce=self.REDUCE_GROUP_CLEAN, initial={ 'resources': [], } )[0] self.db.resource.remove({'_id': {'$nin': results['resources']}}) @staticmethod def _get_marker(db_collection, marker_pairs): """Return the mark document according to the attribute-value pairs. :param db_collection: Database collection that be query. :param maker_pairs: Attribute-value pairs filter. """ if db_collection is None: return if not marker_pairs: return ret = db_collection.find(marker_pairs, limit=2) if ret.count() == 0: raise base.NoResultFound elif ret.count() > 1: raise base.MultipleResultsFound else: _ret = ret.__getitem__(0) return _ret @classmethod def _recurse_sort_keys(cls, sort_keys, marker, flag): _first = sort_keys[0] value = marker[_first] if len(sort_keys) == 1: return {_first: {flag: value}} else: criteria_equ = {_first: {'eq': value}} criteria_cmp = cls._recurse_sort_keys(sort_keys[1:], marker, flag) return dict(criteria_equ, ** criteria_cmp) @classmethod def _build_paginate_query(cls, marker, sort_keys=None, sort_dir='desc'): """Returns a query with sorting / pagination. Pagination works by requiring sort_key and sort_dir. We use the last item in previous page as the 'marker' for pagination. So we return values that follow the passed marker in the order. :param q: The query dict passed in. :param marker: the last item of the previous page; we return the next results after this item. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort parameters, query to use """ all_sort = [] sort_keys = sort_keys or [] all_sort, _op = cls._build_sort_instructions(sort_keys, sort_dir) if marker is not None: sort_criteria_list = [] for i in range(len(sort_keys)): # NOTE(fengqian): Generate the query criteria recursively. # sort_keys=[k1, k2, k3], maker_value=[v1, v2, v3] # sort_flags = ['$lt', '$gt', 'lt']. # The query criteria should be # {'k3': {'$lt': 'v3'}, 'k2': {'eq': 'v2'}, 'k1': # {'eq': 'v1'}}, # {'k2': {'$gt': 'v2'}, 'k1': {'eq': 'v1'}}, # {'k1': {'$lt': 'v1'}} with 'OR' operation. # Each recurse will generate one items of three. sort_criteria_list.append(cls._recurse_sort_keys( sort_keys[:(len(sort_keys) - i)], marker, _op)) metaquery = {"$or": sort_criteria_list} else: metaquery = {} return all_sort, metaquery @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction and paging operator. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort instructions and paging operator """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get( sort_dir, cls.SORT_OPERATION_MAPPING['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions, operation @classmethod def paginate_query(cls, q, db_collection, limit=None, marker=None, sort_keys=None, sort_dir='desc'): """Returns a query result with sorting / pagination. Pagination works by requiring sort_key and sort_dir. We use the last item in previous page as the 'marker' for pagination. So we return values that follow the passed marker in the order. :param q: the query dict passed in. :param db_collection: Database collection that be query. :param limit: maximum number of items to return. :param marker: the last item of the previous page; we return the next results after this item. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: The query with sorting/pagination added. """ sort_keys = sort_keys or [] all_sort, query = cls._build_paginate_query(marker, sort_keys, sort_dir) q.update(query) # NOTE(Fengqian): MongoDB collection.find can not handle limit # when it equals None, it will raise TypeError, so we treat # None as 0 for the value of limit. if limit is None: limit = 0 return db_collection.find(q, limit=limit, sort=all_sort) def _get_time_constrained_resources(self, query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource): """Return an iterable of models.Resource instances Items are constrained by sample timestamp. :param query: project/user/source query :param start_timestamp: modified timestamp start range. :param start_timestamp_op: start time operator, like gt, ge. :param end_timestamp: modified timestamp end range. :param end_timestamp_op: end time operator, like lt, le. :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['resource_id'] = resource # Add resource_ prefix so it matches the field in the db query.update(dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) # FIXME(dhellmann): This may not perform very well, # but doing any better will require changing the database # schema and that will need more thought than I have time # to put into it today. # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range(start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: query['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource') sort_instructions = self._build_sort_instructions(sort_keys)[0] # use a unique collection name for the results collection, # as result post-sorting (as oppposed to reduce pre-sorting) # is not possible on an inline M-R out = 'resource_list_%s' % uuid.uuid4() self.db.meter.map_reduce(self.MAP_RESOURCES, self.REDUCE_RESOURCES, out=out, sort={'resource_id': 1}, query=query) try: for r in self.db[out].find(sort=sort_instructions): resource = r['value'] yield models.Resource( resource_id=r['_id'], user_id=resource['user_id'], project_id=resource['project_id'], first_sample_timestamp=resource['first_timestamp'], last_sample_timestamp=resource['last_timestamp'], source=resource['source'], metadata=resource['metadata']) finally: self.db[out].drop() def _get_floating_resources(self, query, metaquery, resource): """Return an iterable of models.Resource instances Items are unconstrained by timestamp. :param query: project/user/source query :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['_id'] = resource query.update(dict((k, v) for (k, v) in six.iteritems(metaquery))) keys = base._handle_sort_key('resource') sort_keys = ['last_sample_timestamp' if i == 'timestamp' else i for i in keys] sort_instructions = self._build_sort_instructions(sort_keys)[0] for r in self.db.resource.find(query, sort=sort_instructions): yield models.Resource( resource_id=r['_id'], user_id=r['user_id'], project_id=r['project_id'], first_sample_timestamp=r.get('first_sample_timestamp', self._GENESIS), last_sample_timestamp=r.get('last_sample_timestamp', self._APOCALYPSE), source=r['source'], metadata=r['metadata']) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') metaquery = metaquery or {} query = {} if user is not None: query['user_id'] = user if project is not None: query['project_id'] = project if source is not None: query['source'] = source if start_timestamp or end_timestamp: return self._get_time_constrained_resources(query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource) else: return self._get_floating_resources(query, metaquery, resource) def _aggregate_param(self, fragment_key, aggregate): fragment_map = self.STANDARD_AGGREGATES[fragment_key] if not aggregate: return ''.join([f for f in fragment_map.values()]) fragments = '' for a in aggregate: if a.func in self.STANDARD_AGGREGATES[fragment_key]: fragment_map = self.STANDARD_AGGREGATES[fragment_key] fragments += fragment_map[a.func] elif a.func in self.UNPARAMETERIZED_AGGREGATES[fragment_key]: fragment_map = self.UNPARAMETERIZED_AGGREGATES[fragment_key] fragments += fragment_map[a.func] elif a.func in self.PARAMETERIZED_AGGREGATES[fragment_key]: fragment_map = self.PARAMETERIZED_AGGREGATES[fragment_key] v = self.PARAMETERIZED_AGGREGATES['validate'].get(a.func) if not (v and v(a.param)): raise storage.StorageBadAggregate('Bad aggregate: %s.%s' % (a.func, a.param)) params = dict(aggregate_param=a.param) fragments += (fragment_map[a.func] % params) else: raise ceilometer.NotImplementedError( 'Selectable aggregate function %s' ' is not supported' % a.func) return fragments def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set(['user_id', 'project_id', 'resource_id', 'source'])): raise ceilometer.NotImplementedError( "Unable to group by these fields") q = pymongo_utils.make_query_from_filter(sample_filter) if period: if sample_filter.start: period_start = sample_filter.start else: period_start = self.db.meter.find( limit=1, sort=[('timestamp', pymongo.ASCENDING)])[0]['timestamp'] period_start = int(calendar.timegm(period_start.utctimetuple())) map_params = {'period': period, 'period_first': period_start, 'groupby_fields': json.dumps(groupby)} if groupby: map_fragment = self.MAP_STATS_PERIOD_GROUPBY else: map_fragment = self.MAP_STATS_PERIOD else: if groupby: map_params = {'groupby_fields': json.dumps(groupby)} map_fragment = self.MAP_STATS_GROUPBY else: map_params = dict() map_fragment = self.MAP_STATS sub = self._aggregate_param map_params['aggregate_initial_val'] = sub('emit_initial', aggregate) map_params['aggregate_body_val'] = sub('emit_body', aggregate) map_stats = map_fragment % map_params reduce_params = dict( aggregate_initial_val=sub('reduce_initial', aggregate), aggregate_body_val=sub('reduce_body', aggregate), aggregate_computation_val=sub('reduce_computation', aggregate) ) reduce_stats = self.REDUCE_STATS % reduce_params finalize_params = dict(aggregate_val=sub('finalize', aggregate)) finalize_stats = self.FINALIZE_STATS % finalize_params results = self.db.meter.map_reduce( map_stats, reduce_stats, {'inline': 1}, finalize=finalize_stats, query=q, ) # FIXME(terriyu) Fix get_meter_statistics() so we don't use sorted() # to return the results return sorted( (self._stats_result_to_model(r['value'], groupby, aggregate) for r in results['results']), key=operator.attrgetter('period_start')) @staticmethod def _stats_result_aggregates(result, aggregate): stats_args = {} for attr in ['count', 'min', 'max', 'sum', 'avg']: if attr in result: stats_args[attr] = result[attr] if aggregate: stats_args['aggregate'] = {} for a in aggregate: ak = '%s%s' % (a.func, '/%s' % a.param if a.param else '') if ak in result: stats_args['aggregate'][ak] = result[ak] elif 'aggregate' in result: stats_args['aggregate'][ak] = result['aggregate'].get(ak) return stats_args @staticmethod def _stats_result_to_model(result, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result['unit'] stats_args['duration'] = result['duration'] stats_args['duration_start'] = result['duration_start'] stats_args['duration_end'] = result['duration_end'] stats_args['period'] = result['period'] stats_args['period_start'] = result['period_start'] stats_args['period_end'] = result['period_end'] stats_args['groupby'] = (dict( (g, result['groupby'][g]) for g in groupby) if groupby else None) return models.Statistics(**stats_args)
class Connection(hbase_base.Connection, base.Connection): """Put the alarm data into a HBase database Collections: - alarm: - row_key: uuid of alarm - Column Families: f: contains the raw incoming alarm data - alarm_h: - row_key: uuid of alarm + ":" + reversed timestamp - Column Families: f: raw incoming alarm_history data. Timestamp becomes now() if not determined """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None ALARM_TABLE = "alarm" ALARM_HISTORY_TABLE = "alarm_h" def __init__(self, url): super(Connection, self).__init__(url) def upgrade(self): tables = [self.ALARM_HISTORY_TABLE, self.ALARM_TABLE] column_families = {'f': dict()} with self.conn_pool.connection() as conn: hbase_utils.create_tables(conn, tables, column_families) hbase_migration.migrate_tables(conn, tables) def clear(self): LOG.debug(_('Dropping HBase schema...')) with self.conn_pool.connection() as conn: for table in [self.ALARM_TABLE, self.ALARM_HISTORY_TABLE]: try: conn.disable_table(table) except Exception: LOG.debug(_('Cannot disable table but ignoring error')) try: conn.delete_table(table) except Exception: LOG.debug(_('Cannot delete table but ignoring error')) def update_alarm(self, alarm): """Create an alarm. :param alarm: The alarm to create. It is Alarm object, so we need to call as_dict() """ _id = alarm.alarm_id alarm_to_store = hbase_utils.serialize_entry(alarm.as_dict()) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.put(_id, alarm_to_store) stored_alarm = hbase_utils.deserialize_entry( alarm_table.row(_id))[0] return models.Alarm(**stored_alarm) create_alarm = update_alarm def delete_alarm(self, alarm_id): with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.delete(alarm_id) def get_alarms(self, name=None, user=None, state=None, meter=None, project=None, enabled=None, alarm_id=None, pagination=None, alarm_type=None): if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') if meter: raise ceilometer.NotImplementedError( 'Filter by meter not implemented') q = hbase_utils.make_query(alarm_id=alarm_id, name=name, enabled=enabled, user_id=user, project_id=project, state=state, type=alarm_type) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) gen = alarm_table.scan(filter=q) alarms = [hbase_utils.deserialize_entry(data)[0] for ignored, data in gen] for alarm in sorted( alarms, key=operator.itemgetter('timestamp'), reverse=True): yield models.Alarm(**alarm) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, alarm_type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): q = hbase_utils.make_query(alarm_id=alarm_id, on_behalf_of=on_behalf_of, type=alarm_type, user_id=user, project_id=project) start_row, end_row = hbase_utils.make_timestamp_query( hbase_utils.make_general_rowkey_scan, start=start_timestamp, start_op=start_timestamp_op, end=end_timestamp, end_op=end_timestamp_op, bounds_only=True, some_id=alarm_id) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) gen = alarm_history_table.scan(filter=q, row_start=start_row, row_stop=end_row) for ignored, data in gen: stored_entry = hbase_utils.deserialize_entry(data)[0] yield models.AlarmChange(**stored_entry) def record_alarm_change(self, alarm_change): """Record alarm change event.""" alarm_change_dict = hbase_utils.serialize_entry(alarm_change) ts = alarm_change.get('timestamp') or datetime.datetime.now() rts = hbase_utils.timestamp(ts) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) alarm_history_table.put( hbase_utils.prepare_key(alarm_change.get('alarm_id'), rts), alarm_change_dict)
class Connection(base.Connection): """Put the data into a HBase database Collections: - alarm: - row_key: uuid of alarm - Column Families: f: contains the raw incoming alarm data - alarm_h: - row_key: uuid of alarm + "_" + reversed timestamp - Column Families: f: raw incoming alarm_history data. Timestamp becomes now() if not determined """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None ALARM_TABLE = "alarm" ALARM_HISTORY_TABLE = "alarm_h" def __init__(self, url): """Hbase Connection Initialization.""" opts = self._parse_connection_url(url) if opts['host'] == '__test__': url = os.environ.get('CEILOMETER_TEST_HBASE_URL') if url: # Reparse URL, but from the env variable now opts = self._parse_connection_url(url) self.conn_pool = self._get_connection_pool(opts) else: # This is a in-memory usage for unit tests if Connection._memory_instance is None: LOG.debug( _('Creating a new in-memory HBase ' 'Connection object')) Connection._memory_instance = ( hbase_inmemory.MConnectionPool()) self.conn_pool = Connection._memory_instance else: self.conn_pool = self._get_connection_pool(opts) def upgrade(self): with self.conn_pool.connection() as conn: conn.create_table(self.ALARM_TABLE, {'f': dict()}) conn.create_table(self.ALARM_HISTORY_TABLE, {'f': dict()}) def clear(self): LOG.debug(_('Dropping HBase schema...')) with self.conn_pool.connection() as conn: for table in [self.ALARM_TABLE, self.ALARM_HISTORY_TABLE]: try: conn.disable_table(table) except Exception: LOG.debug(_('Cannot disable table but ignoring error')) try: conn.delete_table(table) except Exception: LOG.debug(_('Cannot delete table but ignoring error')) @staticmethod def _get_connection_pool(conf): """Return a connection pool to the database. .. note:: The tests use a subclass to override this and return an in-memory connection pool. """ LOG.debug( _('connecting to HBase on %(host)s:%(port)s') % ({ 'host': conf['host'], 'port': conf['port'] })) return happybase.ConnectionPool(size=100, host=conf['host'], port=conf['port'], table_prefix=conf['table_prefix']) @staticmethod def _parse_connection_url(url): """Parse connection parameters from a database url. .. note:: HBase Thrift does not support authentication and there is no database name, so we are not looking for these in the url. """ opts = {} result = netutils.urlsplit(url) opts['table_prefix'] = urlparse.parse_qs(result.query).get( 'table_prefix', [None])[0] opts['dbtype'] = result.scheme if ':' in result.netloc: opts['host'], port = result.netloc.split(':') else: opts['host'] = result.netloc port = 9090 opts['port'] = port and int(port) or 9090 return opts def update_alarm(self, alarm): """Create an alarm. :param alarm: The alarm to create. It is Alarm object, so we need to call as_dict() """ _id = alarm.alarm_id alarm_to_store = hbase_utils.serialize_entry(alarm.as_dict()) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.put(_id, alarm_to_store) stored_alarm = hbase_utils.deserialize_entry( alarm_table.row(_id))[0] return models.Alarm(**stored_alarm) create_alarm = update_alarm def delete_alarm(self, alarm_id): with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) alarm_table.delete(alarm_id) def get_alarms(self, name=None, user=None, state=None, meter=None, project=None, enabled=None, alarm_id=None, pagination=None): if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') if meter: raise ceilometer.NotImplementedError( 'Filter by meter not implemented') q = hbase_utils.make_query(alarm_id=alarm_id, name=name, enabled=enabled, user_id=user, project_id=project, state=state) with self.conn_pool.connection() as conn: alarm_table = conn.table(self.ALARM_TABLE) gen = alarm_table.scan(filter=q) for ignored, data in gen: stored_alarm = hbase_utils.deserialize_entry(data)[0] yield models.Alarm(**stored_alarm) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): q = hbase_utils.make_query(alarm_id=alarm_id, on_behalf_of=on_behalf_of, type=type, user_id=user, project_id=project) start_row, end_row = hbase_utils.make_timestamp_query( hbase_utils.make_general_rowkey_scan, start=start_timestamp, start_op=start_timestamp_op, end=end_timestamp, end_op=end_timestamp_op, bounds_only=True, some_id=alarm_id) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) gen = alarm_history_table.scan(filter=q, row_start=start_row, row_stop=end_row) for ignored, data in gen: stored_entry = hbase_utils.deserialize_entry(data)[0] yield models.AlarmChange(**stored_entry) def record_alarm_change(self, alarm_change): """Record alarm change event.""" alarm_change_dict = hbase_utils.serialize_entry(alarm_change) ts = alarm_change.get('timestamp') or datetime.datetime.now() rts = hbase_utils.timestamp(ts) with self.conn_pool.connection() as conn: alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE) alarm_history_table.put( alarm_change.get('alarm_id') + "_" + str(rts), alarm_change_dict)
class Connection(hbase_base.Connection, base.Connection): """Put the event data into a HBase database Collections: - events: - row_key: timestamp of event's generation + uuid of event in format: "%s:%s" % (ts, Event.message_id) - Column Families: f: contains the following qualifiers: - event_type: description of event's type - timestamp: time stamp of event generation - all traits for this event in format: .. code-block:: python "%s:%s" % (trait_name, trait_type) """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None EVENT_TABLE = "event" def __init__(self, url): super(Connection, self).__init__(url) def upgrade(self): tables = [self.EVENT_TABLE] column_families = {'f': dict(max_versions=1)} with self.conn_pool.connection() as conn: hbase_utils.create_tables(conn, tables, column_families) def clear(self): LOG.debug(_('Dropping HBase schema...')) with self.conn_pool.connection() as conn: for table in [self.EVENT_TABLE]: try: conn.disable_table(table) except Exception: LOG.debug(_('Cannot disable table but ignoring error')) try: conn.delete_table(table) except Exception: LOG.debug(_('Cannot delete table but ignoring error')) def record_events(self, event_models): """Write the events to Hbase. :param event_models: a list of models.Event objects. :return problem_events: a list of events that could not be saved in a (reason, event) tuple. From the reasons that are enumerated in storage.models.Event only the UNKNOWN_PROBLEM is applicable here. """ problem_events = [] with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) for event_model in event_models: # Row key consists of timestamp and message_id from # models.Event or purposes of storage event sorted by # timestamp in the database. ts = event_model.generated row = hbase_utils.prepare_key( hbase_utils.timestamp(ts, reverse=False), event_model.message_id) event_type = event_model.event_type traits = {} if event_model.traits: for trait in event_model.traits: key = hbase_utils.prepare_key(trait.name, trait.dtype) traits[key] = trait.value record = hbase_utils.serialize_entry(traits, event_type=event_type, timestamp=ts, raw=event_model.raw) try: events_table.put(row, record) except Exception as ex: LOG.debug(_("Failed to record event: %s") % ex) problem_events.append( (models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. """ q, start, stop = hbase_utils.make_events_query_from_filter( event_filter) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q, row_start=start, row_stop=stop) for event_id, data in gen: traits = [] events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_dtype = key traits.append( models.Trait(name=trait_name, dtype=int(trait_dtype), value=value)) ts, mess = event_id.split(':') yield models.Event(message_id=hbase_utils.unquote(mess), event_type=events_dict['event_type'], generated=events_dict['timestamp'], traits=sorted(traits, key=operator.attrgetter('dtype')), raw=events_dict['raw']) def get_event_types(self): """Return all event types as an iterable of strings.""" with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan() event_types = set() for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if not isinstance(key, tuple) and key.startswith('event_type'): if value not in event_types: event_types.add(value) yield value def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ q = hbase_utils.make_query(event_type=event_type) trait_names = set() with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_type = key if trait_name not in trait_names: # Here we check that our method return only unique # trait types, for ex. if it is found the same trait # types in different events with equal event_type, # method will return only one trait type. It is # proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) data_type = models.Trait.type_names[int(trait_type)] yield {'name': trait_name, 'data_type': data_type} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ q = hbase_utils.make_query(event_type=event_type, trait_type=trait_type) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_type = key yield models.Trait(name=trait_name, dtype=int(trait_type), value=value)
class Connection(base.Connection): """Put the data into a SQLAlchemy database. Tables:: - meter - meter definition - { id: meter id name: meter name type: meter type unit: meter unit } - resource - resource definition - { internal_id: resource id resource_id: resource uuid user_id: user uuid project_id: project uuid source_id: source id resource_metadata: metadata dictionary metadata_hash: metadata dictionary hash } - sample - the raw incoming data - { id: sample id meter_id: meter id (->meter.id) resource_id: resource id (->resource.internal_id) volume: sample volume timestamp: datetime recorded_at: datetime message_signature: message signature message_id: message uuid } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): # Set max_retries to 0, since oslo.db in certain cases may attempt # to retry making the db connection retried max_retries ^ 2 times # in failure case and db reconnection has already been implemented # in storage.__init__.get_connection_from_config function cfg.CONF.set_override('max_retries', 0, group='database') self._engine_facade = db_session.EngineFacade( url, **dict(cfg.CONF.database.items())) def upgrade(self): # NOTE(gordc): to minimise memory, only import migration when needed from oslo.db.sqlalchemy import migration path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) self._engine_facade._session_maker.close_all() engine.dispose() @staticmethod def _create_meter(conn, name, type, unit): # TODO(gordc): implement lru_cache to improve performance try: meter = models.Meter.__table__ trans = conn.begin_nested() if conn.dialect.name == 'sqlite': trans = conn.begin() with trans: meter_row = conn.execute( sa.select([meter.c.id]).where( sa.and_(meter.c.name == name, meter.c.type == type, meter.c.unit == unit))).first() meter_id = meter_row[0] if meter_row else None if meter_id is None: result = conn.execute(meter.insert(), name=name, type=type, unit=unit) meter_id = result.inserted_primary_key[0] except dbexc.DBDuplicateEntry: # retry function to pick up duplicate committed object meter_id = Connection._create_meter(conn, name, type, unit) return meter_id @staticmethod def _create_resource(conn, res_id, user_id, project_id, source_id, rmeta): # TODO(gordc): implement lru_cache to improve performance try: res = models.Resource.__table__ m_hash = hashlib.md5(jsonutils.dumps(rmeta, sort_keys=True)).hexdigest() trans = conn.begin_nested() if conn.dialect.name == 'sqlite': trans = conn.begin() with trans: res_row = conn.execute( sa.select([res.c.internal_id]).where( sa.and_(res.c.resource_id == res_id, res.c.user_id == user_id, res.c.project_id == project_id, res.c.source_id == source_id, res.c.metadata_hash == m_hash))).first() internal_id = res_row[0] if res_row else None if internal_id is None: result = conn.execute(res.insert(), resource_id=res_id, user_id=user_id, project_id=project_id, source_id=source_id, resource_metadata=rmeta, metadata_hash=m_hash) internal_id = result.inserted_primary_key[0] if rmeta and isinstance(rmeta, dict): meta_map = {} for key, v in utils.dict_to_keyval(rmeta): try: _model = sql_utils.META_TYPE_MAP[type(v)] if meta_map.get(_model) is None: meta_map[_model] = [] meta_map[_model].append({ 'id': internal_id, 'meta_key': key, 'value': v }) except KeyError: LOG.warn( _("Unknown metadata type. Key (%s) " "will not be queryable."), key) for _model in meta_map.keys(): conn.execute(_model.__table__.insert(), meta_map[_model]) except dbexc.DBDuplicateEntry: # retry function to pick up duplicate committed object internal_id = Connection._create_resource(conn, res_id, user_id, project_id, source_id, rmeta) return internal_id def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ engine = self._engine_facade.get_engine() with engine.begin() as conn: # Record the raw data for the sample. m_id = self._create_meter(conn, data['counter_name'], data['counter_type'], data['counter_unit']) res_id = self._create_resource(conn, data['resource_id'], data['user_id'], data['project_id'], data['source'], data['resource_metadata']) sample = models.Sample.__table__ conn.execute(sample.insert(), meter_id=m_id, resource_id=res_id, timestamp=data['timestamp'], volume=data['counter_volume'], message_signature=data['message_signature'], message_id=data['message_id']) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ session = self._engine_facade.get_session() with session.begin(): end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) sample_q = (session.query( models.Sample).filter(models.Sample.timestamp < end)) sample_subq = sample_q.subquery() for table in [ models.MetaText, models.MetaBigInt, models.MetaFloat, models.MetaBool ]: (session.query(table).join( sample_subq, sample_subq.c.id == table.id).delete()) rows = sample_q.delete() # remove Meter definitions with no matching samples (session.query( models.Meter).filter(~models.Meter.samples.any()).delete( synchronize_session='fetch')) (session.query( models.Resource).filter(~models.Resource.samples.any()).delete( synchronize_session='fetch')) LOG.info(_("%d samples removed from database"), rows) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of api_models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') s_filter = storage.SampleFilter(user=user, project=project, source=source, start_timestamp=start_timestamp, start_timestamp_op=start_timestamp_op, end_timestamp=end_timestamp, end_timestamp_op=end_timestamp_op, metaquery=metaquery, resource=resource) session = self._engine_facade.get_session() # get list of resource_ids res_q = session.query(distinct(models.Resource.resource_id)).join( models.Sample, models.Sample.resource_id == models.Resource.internal_id) res_q = make_query_from_filter(session, res_q, s_filter, require_meter=False) for res_id in res_q.all(): # get latest Sample max_q = (session.query(models.Sample).join( models.Resource, models.Resource.internal_id == models.Sample. resource_id).filter(models.Resource.resource_id == res_id[0])) max_q = make_query_from_filter(session, max_q, s_filter, require_meter=False) max_q = max_q.order_by(models.Sample.timestamp.desc(), models.Sample.id.desc()).limit(1) # get the min timestamp value. min_q = (session.query(models.Sample.timestamp).join( models.Resource, models.Resource.internal_id == models.Sample. resource_id).filter(models.Resource.resource_id == res_id[0])) min_q = make_query_from_filter(session, min_q, s_filter, require_meter=False) min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1) sample = max_q.first() if sample: yield api_models.Resource( resource_id=sample.resource.resource_id, project_id=sample.resource.project_id, first_sample_timestamp=min_q.first().timestamp, last_sample_timestamp=sample.timestamp, source=sample.resource.source_id, user_id=sample.resource.user_id, metadata=sample.resource.resource_metadata) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of api_models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional ID of the resource. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') s_filter = storage.SampleFilter(user=user, project=project, source=source, metaquery=metaquery, resource=resource) # NOTE(gordc): get latest sample of each meter/resource. we do not # filter here as we want to filter only on latest record. session = self._engine_facade.get_session() subq = session.query(func.max(models.Sample.id).label('id')).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id).group_by( models.Sample.meter_id, models.Resource.resource_id) if resource: subq = subq.filter(models.Resource.resource_id == resource) subq = subq.subquery() # get meter details for samples. query_sample = (session.query( models.Sample.meter_id, models.Meter.name, models.Meter.type, models.Meter.unit, models.Resource.resource_id, models.Resource.project_id, models.Resource.source_id, models.Resource.user_id).join( subq, subq.c.id == models.Sample.id).join( models.Meter, models.Meter.id == models.Sample.meter_id).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id)) query_sample = make_query_from_filter(session, query_sample, s_filter, require_meter=False) for row in query_sample.all(): yield api_models.Meter(name=row.name, type=row.type, unit=row.unit, resource_id=row.resource_id, project_id=row.project_id, source=row.source_id, user_id=row.user_id) def _retrieve_samples(self, query): samples = query.all() for s in samples: # Remove the id generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. yield api_models.Sample( source=s.source_id, counter_name=s.counter_name, counter_type=s.counter_type, counter_unit=s.counter_unit, counter_volume=s.counter_volume, user_id=s.user_id, project_id=s.project_id, resource_id=s.resource_id, timestamp=s.timestamp, recorded_at=s.recorded_at, resource_metadata=s.resource_metadata, message_id=s.message_id, message_signature=s.message_signature, ) def get_samples(self, sample_filter, limit=None): """Return an iterable of api_models.Samples. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] session = self._engine_facade.get_session() query = session.query( models.Sample.timestamp, models.Sample.recorded_at, models.Sample.message_id, models.Sample.message_signature, models.Sample.volume.label('counter_volume'), models.Meter.name.label('counter_name'), models.Meter.type.label('counter_type'), models.Meter.unit.label('counter_unit'), models.Resource.source_id, models.Resource.user_id, models.Resource.project_id, models.Resource.resource_metadata, models.Resource.resource_id).join( models.Meter, models.Meter.id == models.Sample.meter_id).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id).order_by( models.Sample.timestamp.desc()) query = make_query_from_filter(session, query, sample_filter, require_meter=False) if limit: query = query.limit(limit) return self._retrieve_samples(query) def query_samples(self, filter_expr=None, orderby=None, limit=None): if limit == 0: return [] session = self._engine_facade.get_session() query = session.query(models.FullSample) transformer = sql_utils.QueryTransformer(models.FullSample, query) if filter_expr is not None: transformer.apply_filter(filter_expr) transformer.apply_options(orderby, limit) return self._retrieve_samples(transformer.get_query()) @staticmethod def _get_aggregate_functions(aggregate): if not aggregate: return [f for f in STANDARD_AGGREGATES.values()] functions = [] for a in aggregate: if a.func in STANDARD_AGGREGATES: functions.append(STANDARD_AGGREGATES[a.func]) elif a.func in UNPARAMETERIZED_AGGREGATES: functions.append(UNPARAMETERIZED_AGGREGATES[a.func]) elif a.func in PARAMETERIZED_AGGREGATES['compute']: validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func) if not (validate and validate(a.param)): raise storage.StorageBadAggregate('Bad aggregate: %s.%s' % (a.func, a.param)) compute = PARAMETERIZED_AGGREGATES['compute'][a.func] functions.append(compute(a.param)) else: raise ceilometer.NotImplementedError( 'Selectable aggregate function %s' ' is not supported' % a.func) return functions def _make_stats_query(self, sample_filter, groupby, aggregate): select = [ func.min(models.Sample.timestamp).label('tsmin'), func.max(models.Sample.timestamp).label('tsmax'), models.Meter.unit ] select.extend(self._get_aggregate_functions(aggregate)) session = self._engine_facade.get_session() if groupby: group_attributes = [] for g in groupby: if g != 'resource_metadata.instance_type': group_attributes.append(getattr(models.Resource, g)) else: group_attributes.append( getattr( models.MetaText, 'value').label('resource_metadata.instance_type')) select.extend(group_attributes) query = (session.query(*select).join( models.Meter, models.Meter.id == models.Sample.meter_id).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id).group_by(models.Meter.unit)) if groupby: for g in groupby: if g == 'resource_metadata.instance_type': query = query.join( models.MetaText, models.Resource.internal_id == models.MetaText.id) query = query.filter( models.MetaText.meta_key == 'instance_type') query = query.group_by(*group_attributes) return make_query_from_filter(session, query, sample_filter) @staticmethod def _stats_result_aggregates(result, aggregate): stats_args = {} if isinstance(result.count, (int, long)): stats_args['count'] = result.count for attr in ['min', 'max', 'sum', 'avg']: if hasattr(result, attr): stats_args[attr] = getattr(result, attr) if aggregate: stats_args['aggregate'] = {} for a in aggregate: key = '%s%s' % (a.func, '/%s' % a.param if a.param else '') stats_args['aggregate'][key] = getattr(result, key) return stats_args @staticmethod def _stats_result_to_model(result, period, period_start, period_end, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result.unit duration = (timeutils.delta_seconds(result.tsmin, result.tsmax) if result.tsmin is not None and result.tsmax is not None else None) stats_args['duration'] = duration stats_args['duration_start'] = result.tsmin stats_args['duration_end'] = result.tsmax stats_args['period'] = period stats_args['period_start'] = period_start stats_args['period_end'] = period_end stats_args['groupby'] = (dict( (g, getattr(result, g)) for g in groupby) if groupby else None) return api_models.Statistics(**stats_args) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of api_models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if groupby: for group in groupby: if group not in [ 'user_id', 'project_id', 'resource_id', 'resource_metadata.instance_type' ]: raise ceilometer.NotImplementedError('Unable to group by ' 'these fields') if not period: for res in self._make_stats_query(sample_filter, groupby, aggregate): if res.count: yield self._stats_result_to_model(res, 0, res.tsmin, res.tsmax, groupby, aggregate) return if not (sample_filter.start_timestamp and sample_filter.end_timestamp): res = self._make_stats_query(sample_filter, None, aggregate).first() if not res: # NOTE(liusheng):The 'res' may be NoneType, because no # sample has found with sample filter(s). return query = self._make_stats_query(sample_filter, groupby, aggregate) # HACK(jd) This is an awful method to compute stats by period, but # since we're trying to be SQL agnostic we have to write portable # code, so here it is, admire! We're going to do one request to get # stats by period. We would like to use GROUP BY, but there's no # portable way to manipulate timestamp in SQL, so we can't. for period_start, period_end in base.iter_period( sample_filter.start_timestamp or res.tsmin, sample_filter.end_timestamp or res.tsmax, period): q = query.filter(models.Sample.timestamp >= period_start) q = q.filter(models.Sample.timestamp < period_end) for r in q.all(): if r.count: yield self._stats_result_to_model( result=r, period=int( timeutils.delta_seconds(period_start, period_end)), period_start=period_start, period_end=period_end, groupby=groupby, aggregate=aggregate)
def __init__(self, conf, AVAILABLE_CAPABILITIES): super(Connection, self).__init__( conf, utils.update_nested(COMMON_AVAILABLE_CAPABILITIES, AVAILABLE_CAPABILITIES))
class Connection(pymongo_base.Connection): """The db2 storage for Ceilometer Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() GROUP = { '_id': '$counter_name', 'unit': { '$min': '$counter_unit' }, 'min': { '$min': '$counter_volume' }, 'max': { '$max': '$counter_volume' }, 'sum': { '$sum': '$counter_volume' }, 'count': { '$sum': 1 }, 'duration_start': { '$min': '$timestamp' }, 'duration_end': { '$max': '$timestamp' }, } PROJECT = { '_id': 0, 'unit': 1, 'min': 1, 'max': 1, 'sum': 1, 'count': 1, 'avg': { '$divide': ['$sum', '$count'] }, 'duration_start': 1, 'duration_end': 1, } SORT_OPERATION_MAP = {'desc': pymongo.DESCENDING, 'asc': pymongo.ASCENDING} SECONDS_IN_A_DAY = 86400 def __init__(self, url): # Since we are using pymongo, even though we are connecting to DB2 # we still have to make sure that the scheme which used to distinguish # db2 driver from mongodb driver be replaced so that pymongo will not # produce an exception on the scheme. url = url.replace('db2:', 'mongodb:', 1) self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.2 to use aggregate(), since we are using mongodb # as backend for test, the following code is necessary to make sure # that the test wont try aggregate on older mongodb during the test. # For db2, the versionArray won't be part of the server_info, so there # will not be exception when real db2 gets used as backend. server_info = self.conn.server_info() if server_info.get('sysInfo'): self._using_mongodb = True else: self._using_mongodb = False if self._using_mongodb and server_info.get('versionArray') < [2, 2]: raise storage.StorageBadVersion("Need at least MongoDB 2.2") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) self.upgrade() @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort parameters """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir = cls.SORT_OPERATION_MAP.get(sort_dir, cls.SORT_OPERATION_MAP['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions def upgrade(self, version=None): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. if self.db.resource.index_information() == {}: resource_id = str(bson.objectid.ObjectId()) self.db.resource.insert({ '_id': resource_id, 'no_key': resource_id }) meter_id = str(bson.objectid.ObjectId()) timestamp = timeutils.utcnow() self.db.meter.insert({ '_id': meter_id, 'no_key': meter_id, 'timestamp': timestamp }) self.db.resource.ensure_index([('user_id', pymongo.ASCENDING), ('project_id', pymongo.ASCENDING), ('source', pymongo.ASCENDING)], name='resource_idx') self.db.meter.ensure_index([('resource_id', pymongo.ASCENDING), ('user_id', pymongo.ASCENDING), ('project_id', pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ('source', pymongo.ASCENDING)], name='meter_idx') self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') self.db.resource.remove({'_id': resource_id}) self.db.meter.remove({'_id': meter_id}) # remove API v1 related table self.db.user.drop() self.db.project.drop() def clear(self): # db2 does not support drop_database, remove all collections for col in ['resource', 'meter']: self.db[col].drop() # drop_database command does nothing on db2 database since this has # not been implemented. However calling this method is important for # removal of all the empty dbs created during the test runs since # test run is against mongodb on Jenkins self.conn.drop_database(self.db.name) self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata self.db.resource.update( {'_id': data['resource_id']}, { '$set': { 'project_id': data['project_id'], 'user_id': data['user_id'] or 'null', 'metadata': data['resource_metadata'], 'source': data['source'], }, '$addToSet': { 'meter': { 'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, ) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() # Make sure that the data does have field _id which db2 wont add # automatically. if record.get('_id') is None: record['_id'] = str(bson.objectid.ObjectId()) self.db.meter.insert(record) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') metaquery = metaquery or {} q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if source is not None: q['source'] = source if resource is not None: q['resource_id'] = resource # Add resource_ prefix so it matches the field in the db q.update( dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) if start_timestamp or end_timestamp: # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range( start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: q['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource', 'timestamp') sort_keys.insert(0, 'resource_id') sort_instructions = self._build_sort_instructions(sort_keys=sort_keys, sort_dir='desc') resource = lambda x: x['resource_id'] meters = self.db.meter.find(q, sort=sort_instructions) for resource_id, r_meters in itertools.groupby(meters, key=resource): # Because we have to know first/last timestamp, and we need a full # list of references to the resource's meters, we need a tuple # here. r_meters = tuple(r_meters) latest_meter = r_meters[0] last_ts = latest_meter['timestamp'] first_ts = r_meters[-1]['timestamp'] yield models.Resource(resource_id=latest_meter['resource_id'], project_id=latest_meter['project_id'], first_sample_timestamp=first_ts, last_sample_timestamp=last_ts, source=latest_meter['source'], user_id=latest_meter['user_id'], metadata=latest_meter['resource_metadata']) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set(['user_id', 'project_id', 'resource_id', 'source'])): raise ceilometer.NotImplementedError( "Unable to group by these fields") if aggregate: raise ceilometer.NotImplementedError( 'Selectable aggregates not implemented') q = pymongo_utils.make_query_from_filter(sample_filter) if period: if sample_filter.start: period_start = sample_filter.start else: period_start = self.db.meter.find(limit=1, sort=[('timestamp', pymongo.ASCENDING) ])[0]['timestamp'] if groupby: sort_keys = ['counter_name'] + groupby + ['timestamp'] else: sort_keys = ['counter_name', 'timestamp'] sort_instructions = self._build_sort_instructions(sort_keys=sort_keys, sort_dir='asc') meters = self.db.meter.find(q, sort=sort_instructions) def _group_key(meter): # the method to define a key for groupby call key = {} for y in sort_keys: if y == 'timestamp' and period: key[y] = ( timeutils.delta_seconds(period_start, meter[y]) // period) elif y != 'timestamp': key[y] = meter[y] return key def _to_offset(periods): return { 'days': (periods * period) // self.SECONDS_IN_A_DAY, 'seconds': (periods * period) % self.SECONDS_IN_A_DAY } for key, grouped_meters in itertools.groupby(meters, key=_group_key): stat = models.Statistics(unit=None, min=sys.maxint, max=-sys.maxint, avg=0, sum=0, count=0, period=0, period_start=0, period_end=0, duration=0, duration_start=0, duration_end=0, groupby=None) for meter in grouped_meters: stat.unit = meter.get('counter_unit', '') m_volume = meter.get('counter_volume') if stat.min > m_volume: stat.min = m_volume if stat.max < m_volume: stat.max = m_volume stat.sum += m_volume stat.count += 1 if stat.duration_start == 0: stat.duration_start = meter['timestamp'] stat.duration_end = meter['timestamp'] if groupby and not stat.groupby: stat.groupby = {} for group_key in groupby: stat.groupby[group_key] = meter[group_key] stat.duration = timeutils.delta_seconds(stat.duration_start, stat.duration_end) stat.avg = stat.sum / stat.count if period: stat.period = period periods = key.get('timestamp') stat.period_start = ( period_start + datetime.timedelta(**(_to_offset(periods)))) stat.period_end = ( period_start + datetime.timedelta(**(_to_offset(periods + 1)))) else: stat.period_start = stat.duration_start stat.period_end = stat.duration_end yield stat
class Connection(base.Connection): """Put the data into a SQLAlchemy database. Tables:: - meter - meter definition - { id: meter def id name: meter name type: meter type unit: meter unit } - sample - the raw incoming data - { id: sample id meter_id: meter id (->meter.id) user_id: user uuid project_id: project uuid resource_id: resource uuid source_id: source id resource_metadata: metadata dictionaries volume: sample volume timestamp: datetime message_signature: message signature message_id: message uuid } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): # Set max_retries to 0, since oslo.db in certain cases may attempt # to retry making the db connection retried max_retries ^ 2 times # in failure case and db reconnection has already been implemented # in storage.__init__.get_connection_from_config function options = dict(cfg.CONF.database.items()) options['max_retries'] = 0 # oslo.db doesn't support options defined by Ceilometer for opt in storage.OPTS: options.pop(opt.name, None) self._engine_facade = db_session.EngineFacade(url, **options) def upgrade(self): # NOTE(gordc): to minimise memory, only import migration when needed from oslo_db.sqlalchemy import migration path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'storage', 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) engine.dispose() def _retrieve_data(self, filter_expr, orderby, limit, table): if limit == 0: return [] session = self._engine_facade.get_session() engine = self._engine_facade.get_engine() query = session.query(table) transformer = sql_utils.QueryTransformer(table, query, dialect=engine.dialect.name) if filter_expr is not None: transformer.apply_filter(filter_expr) transformer.apply_options(orderby, limit) retrieve = { models.Alarm: self._retrieve_alarms, models.AlarmChange: self._retrieve_alarm_history } return retrieve[table](transformer.get_query()) @staticmethod def _row_to_alarm_model(row): return alarm_api_models.Alarm( alarm_id=row.alarm_id, enabled=row.enabled, type=row.type, name=row.name, description=row.description, timestamp=row.timestamp, user_id=row.user_id, project_id=row.project_id, state=row.state, state_timestamp=row.state_timestamp, ok_actions=row.ok_actions, alarm_actions=row.alarm_actions, insufficient_data_actions=(row.insufficient_data_actions), rule=row.rule, time_constraints=row.time_constraints, repeat_actions=row.repeat_actions, severity=row.severity) def _retrieve_alarms(self, query): return (self._row_to_alarm_model(x) for x in query.all()) def get_alarms(self, name=None, user=None, state=None, meter=None, project=None, enabled=None, alarm_id=None, alarm_type=None, severity=None): """Yields a lists of alarms that match filters. :param name: Optional name for alarm. :param user: Optional ID for user that owns the resource. :param state: Optional string for alarm state. :param meter: Optional string for alarms associated with meter. :param project: Optional ID for project that owns the resource. :param enabled: Optional boolean to list disable alarm. :param alarm_id: Optional alarm_id to return one alarm. :param alarm_type: Optional alarm type. :param severity: Optional alarm severity """ session = self._engine_facade.get_session() query = session.query(models.Alarm) if name is not None: query = query.filter(models.Alarm.name == name) if enabled is not None: query = query.filter(models.Alarm.enabled == enabled) if user is not None: query = query.filter(models.Alarm.user_id == user) if project is not None: query = query.filter(models.Alarm.project_id == project) if alarm_id is not None: query = query.filter(models.Alarm.alarm_id == alarm_id) if state is not None: query = query.filter(models.Alarm.state == state) if alarm_type is not None: query = query.filter(models.Alarm.type == alarm_type) if severity is not None: query = query.filter(models.Alarm.severity == severity) query = query.order_by(desc(models.Alarm.timestamp)) alarms = self._retrieve_alarms(query) # TODO(cmart): improve this by using sqlalchemy.func factory if meter is not None: alarms = filter( lambda row: row.rule.get('meter_name', None) == meter, alarms) return alarms def create_alarm(self, alarm): """Create an alarm. :param alarm: The alarm to create. """ session = self._engine_facade.get_session() with session.begin(): alarm_row = models.Alarm(alarm_id=alarm.alarm_id) alarm_row.update(alarm.as_dict()) session.add(alarm_row) return self._row_to_alarm_model(alarm_row) def update_alarm(self, alarm): """Update an alarm. :param alarm: the new Alarm to update """ session = self._engine_facade.get_session() with session.begin(): alarm_row = session.merge(models.Alarm(alarm_id=alarm.alarm_id)) alarm_row.update(alarm.as_dict()) return self._row_to_alarm_model(alarm_row) def delete_alarm(self, alarm_id): """Delete an alarm and its history data. :param alarm_id: ID of the alarm to delete """ session = self._engine_facade.get_session() with session.begin(): session.query(models.Alarm).filter( models.Alarm.alarm_id == alarm_id).delete() # FIXME(liusheng): we should use delete cascade session.query(models.AlarmChange).filter( models.AlarmChange.alarm_id == alarm_id).delete() @staticmethod def _row_to_alarm_change_model(row): return alarm_api_models.AlarmChange(event_id=row.event_id, alarm_id=row.alarm_id, type=row.type, detail=row.detail, user_id=row.user_id, project_id=row.project_id, on_behalf_of=row.on_behalf_of, timestamp=row.timestamp) def query_alarms(self, filter_expr=None, orderby=None, limit=None): """Yields a lists of alarms that match filter.""" return self._retrieve_data(filter_expr, orderby, limit, models.Alarm) def _retrieve_alarm_history(self, query): return (self._row_to_alarm_change_model(x) for x in query.all()) def query_alarm_history(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.AlarmChange objects.""" return self._retrieve_data(filter_expr, orderby, limit, models.AlarmChange) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, alarm_type=None, severity=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): """Yields list of AlarmChanges describing alarm history Changes are always sorted in reverse order of occurrence, given the importance of currency. Segregation for non-administrative users is done on the basis of the on_behalf_of parameter. This allows such users to have visibility on both the changes initiated by themselves directly (generally creation, rule changes, or deletion) and also on those changes initiated on their behalf by the alarming service (state transitions after alarm thresholds are crossed). :param alarm_id: ID of alarm to return changes for :param on_behalf_of: ID of tenant to scope changes query (None for administrative user, indicating all projects) :param user: Optional ID of user to return changes for :param project: Optional ID of project to return changes for :param alarm_type: Optional change type :param severity: Optional alarm severity :param start_timestamp: Optional modified timestamp start range :param start_timestamp_op: Optional timestamp start range operation :param end_timestamp: Optional modified timestamp end range :param end_timestamp_op: Optional timestamp end range operation """ session = self._engine_facade.get_session() query = session.query(models.AlarmChange) query = query.filter(models.AlarmChange.alarm_id == alarm_id) if on_behalf_of is not None: query = query.filter( models.AlarmChange.on_behalf_of == on_behalf_of) if user is not None: query = query.filter(models.AlarmChange.user_id == user) if project is not None: query = query.filter(models.AlarmChange.project_id == project) if alarm_type is not None: query = query.filter(models.AlarmChange.type == alarm_type) if severity is not None: query = query.filter(models.AlarmChange.severity == severity) if start_timestamp: if start_timestamp_op == 'gt': query = query.filter( models.AlarmChange.timestamp > start_timestamp) else: query = query.filter( models.AlarmChange.timestamp >= start_timestamp) if end_timestamp: if end_timestamp_op == 'le': query = query.filter( models.AlarmChange.timestamp <= end_timestamp) else: query = query.filter( models.AlarmChange.timestamp < end_timestamp) query = query.order_by(desc(models.AlarmChange.timestamp)) return self._retrieve_alarm_history(query) def record_alarm_change(self, alarm_change): """Record alarm change event.""" session = self._engine_facade.get_session() with session.begin(): alarm_change_row = models.AlarmChange( event_id=alarm_change['event_id']) alarm_change_row.update(alarm_change) session.add(alarm_change_row) def clear_expired_alarm_history_data(self, alarm_history_ttl): """Clear expired alarm history data from the backend storage system. Clearing occurs according to the time-to-live. :param alarm_history_ttl: Number of seconds to keep alarm history records for. """ session = self._engine_facade.get_session() with session.begin(): valid_start = (timeutils.utcnow() - datetime.timedelta(seconds=alarm_history_ttl)) deleted_rows = (session.query(models.AlarmChange).filter( models.AlarmChange.timestamp < valid_start).delete()) LOG.info(_LI("%d alarm histories are removed from database"), deleted_rows)
class Connection(base.Connection): """Base Alarm Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def update_alarm(self, alarm): """Update alarm.""" data = alarm.as_dict() self.db.alarm.update( {'alarm_id': alarm.alarm_id}, {'$set': data}, upsert=True) stored_alarm = self.db.alarm.find({'alarm_id': alarm.alarm_id})[0] del stored_alarm['_id'] self._ensure_encapsulated_rule_format(stored_alarm) self._ensure_time_constraints(stored_alarm) return models.Alarm(**stored_alarm) create_alarm = update_alarm def delete_alarm(self, alarm_id): """Delete an alarm.""" self.db.alarm.remove({'alarm_id': alarm_id}) def record_alarm_change(self, alarm_change): """Record alarm change event.""" self.db.alarm_history.insert(alarm_change.copy()) def get_alarms(self, name=None, user=None, state=None, meter=None, project=None, enabled=None, alarm_id=None, pagination=None): """Yields a lists of alarms that match filters :param name: The Alarm name. :param user: Optional ID for user that owns the resource. :param state: Optional string for alarm state. :param meter: Optional string for alarms associated with meter. :param project: Optional ID for project that owns the resource. :param enabled: Optional boolean to list disable alarm. :param alarm_id: Optional alarm_id to return one alarm. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if name is not None: q['name'] = name if enabled is not None: q['enabled'] = enabled if alarm_id is not None: q['alarm_id'] = alarm_id if state is not None: q['state'] = state if meter is not None: q['rule.meter_name'] = meter return self._retrieve_alarms(q, [], None) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): """Yields list of AlarmChanges describing alarm history Changes are always sorted in reverse order of occurrence, given the importance of currency. Segregation for non-administrative users is done on the basis of the on_behalf_of parameter. This allows such users to have visibility on both the changes initiated by themselves directly (generally creation, rule changes, or deletion) and also on those changes initiated on their behalf by the alarming service (state transitions after alarm thresholds are crossed). :param alarm_id: ID of alarm to return changes for :param on_behalf_of: ID of tenant to scope changes query (None for administrative user, indicating all projects) :param user: Optional ID of user to return changes for :param project: Optional ID of project to return changes for :project type: Optional change type :param start_timestamp: Optional modified timestamp start range :param start_timestamp_op: Optional timestamp start range operation :param end_timestamp: Optional modified timestamp end range :param end_timestamp_op: Optional timestamp end range operation """ q = dict(alarm_id=alarm_id) if on_behalf_of is not None: q['on_behalf_of'] = on_behalf_of if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if type is not None: q['type'] = type if start_timestamp or end_timestamp: ts_range = pymongo_utils.make_timestamp_range(start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: q['timestamp'] = ts_range return self._retrieve_alarm_changes(q, [("timestamp", pymongo.DESCENDING)], None) def query_alarms(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.Alarm objects.""" return self._retrieve_data(filter_expr, orderby, limit, models.Alarm) def query_alarm_history(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.AlarmChange objects.""" return self._retrieve_data(filter_expr, orderby, limit, models.AlarmChange) def _retrieve_data(self, filter_expr, orderby, limit, model): if limit == 0: return [] query_filter = {} orderby_filter = [("timestamp", pymongo.DESCENDING)] transformer = pymongo_utils.QueryTransformer() if orderby is not None: orderby_filter = transformer.transform_orderby(orderby) if filter_expr is not None: query_filter = transformer.transform_filter(filter_expr) retrieve = {models.Alarm: self._retrieve_alarms, models.AlarmChange: self._retrieve_alarm_changes} return retrieve[model](query_filter, orderby_filter, limit) def _retrieve_alarms(self, query_filter, orderby, limit): if limit is not None: alarms = self.db.alarm.find(query_filter, limit=limit, sort=orderby) else: alarms = self.db.alarm.find(query_filter, sort=orderby) for alarm in alarms: a = {} a.update(alarm) del a['_id'] self._ensure_encapsulated_rule_format(a) self._ensure_time_constraints(a) yield models.Alarm(**a) def _retrieve_alarm_changes(self, query_filter, orderby, limit): if limit is not None: alarms_history = self.db.alarm_history.find(query_filter, limit=limit, sort=orderby) else: alarms_history = self.db.alarm_history.find( query_filter, sort=orderby) for alarm_history in alarms_history: ah = {} ah.update(alarm_history) del ah['_id'] yield models.AlarmChange(**ah) @classmethod def _ensure_encapsulated_rule_format(cls, alarm): """Ensure the alarm returned by the storage have the correct format. The previous format looks like: { 'alarm_id': '0ld-4l3rt', 'enabled': True, 'name': 'old-alert', 'description': 'old-alert', 'timestamp': None, 'meter_name': 'cpu', 'user_id': 'me', 'project_id': 'and-da-boys', 'comparison_operator': 'lt', 'threshold': 36, 'statistic': 'count', 'evaluation_periods': 1, 'period': 60, 'state': "insufficient data", 'state_timestamp': None, 'ok_actions': [], 'alarm_actions': ['http://nowhere/alarms'], 'insufficient_data_actions': [], 'repeat_actions': False, 'matching_metadata': {'key': 'value'} # or 'matching_metadata': [{'key': 'key', 'value': 'value'}] } """ if isinstance(alarm.get('rule'), dict): return alarm['type'] = 'threshold' alarm['rule'] = {} alarm['matching_metadata'] = cls._decode_matching_metadata( alarm['matching_metadata']) for field in ['period', 'evaluation_periods', 'threshold', 'statistic', 'comparison_operator', 'meter_name']: if field in alarm: alarm['rule'][field] = alarm[field] del alarm[field] query = [] for key in alarm['matching_metadata']: query.append({'field': key, 'op': 'eq', 'value': alarm['matching_metadata'][key], 'type': 'string'}) del alarm['matching_metadata'] alarm['rule']['query'] = query @staticmethod def _decode_matching_metadata(matching_metadata): if isinstance(matching_metadata, dict): # note(sileht): keep compatibility with alarm # with matching_metadata as a dict return matching_metadata else: new_matching_metadata = {} for elem in matching_metadata: new_matching_metadata[elem['key']] = elem['value'] return new_matching_metadata @staticmethod def _ensure_time_constraints(alarm): """Ensures the alarm has a time constraints field.""" if 'time_constraints' not in alarm: alarm['time_constraints'] = []
class Connection(base.Connection): """Put the data into a SQLAlchemy database. Tables:: - meter - meter definition - { id: meter def id name: meter name type: meter type unit: meter unit } - sample - the raw incoming data - { id: sample id meter_id: meter id (->meter.id) user_id: user uuid project_id: project uuid resource_id: resource uuid source_id: source id resource_metadata: metadata dictionaries volume: sample volume timestamp: datetime message_signature: message signature message_id: message uuid } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) def __init__(self, url): self._engine_facade = sqlalchemy_session.EngineFacade.from_config( url, cfg.CONF # TODO(Alexei_987) Remove access to global CONF object ) def upgrade(self): path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) self._engine_facade._session_maker.close_all() engine.dispose() @staticmethod def _create_meter(session, name, type, unit): try: nested = session.connection().dialect.name != 'sqlite' with session.begin(nested=nested, subtransactions=not nested): obj = session.query(models.Meter)\ .filter(models.Meter.name == name)\ .filter(models.Meter.type == type)\ .filter(models.Meter.unit == unit).first() if obj is None: obj = models.Meter(name=name, type=type, unit=unit) session.add(obj) except dbexc.DBDuplicateEntry: # retry function to pick up duplicate committed object obj = Connection._create_meter(session, name, type, unit) return obj def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ session = self._engine_facade.get_session() with session.begin(): # Record the raw data for the sample. rmetadata = data['resource_metadata'] meter = self._create_meter(session, data['counter_name'], data['counter_type'], data['counter_unit']) sample = models.Sample(meter_id=meter.id) session.add(sample) sample.resource_id = data['resource_id'] sample.project_id = data['project_id'] sample.user_id = data['user_id'] sample.timestamp = data['timestamp'] sample.resource_metadata = rmetadata sample.volume = data['counter_volume'] sample.message_signature = data['message_signature'] sample.message_id = data['message_id'] sample.source_id = data['source'] session.flush() if rmetadata: if isinstance(rmetadata, dict): for key, v in utils.dict_to_keyval(rmetadata): try: _model = META_TYPE_MAP[type(v)] except KeyError: LOG.warn( _("Unknown metadata type. Key (%s) will " "not be queryable."), key) else: session.add( _model(id=sample.id, meta_key=key, value=v)) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system according to the time-to-live. :param ttl: Number of seconds to keep records for. """ session = self._engine_facade.get_session() with session.begin(): end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) sample_query = session.query(models.Sample)\ .filter(models.Sample.timestamp < end) for sample_obj in sample_query.all(): session.delete(sample_obj) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of api_models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') metaquery = metaquery or {} def _apply_filters(query): # TODO(gordc) this should be merged with make_query_from_filter for column, value in [(models.Sample.resource_id, resource), (models.Sample.user_id, user), (models.Sample.project_id, project), (models.Sample.source_id, source)]: if value: query = query.filter(column == value) if metaquery: query = apply_metaquery_filter(session, query, metaquery) if start_timestamp: if start_timestamp_op == 'gt': query = query.filter( models.Sample.timestamp > start_timestamp) else: query = query.filter( models.Sample.timestamp >= start_timestamp) if end_timestamp: if end_timestamp_op == 'le': query = query.filter( models.Sample.timestamp <= end_timestamp) else: query = query.filter( models.Sample.timestamp < end_timestamp) return query session = self._engine_facade.get_session() # get list of resource_ids res_q = session.query(distinct(models.Sample.resource_id)) res_q = _apply_filters(res_q) for res_id in res_q.all(): # get latest Sample max_q = session.query(models.Sample)\ .filter(models.Sample.resource_id == res_id[0]) max_q = _apply_filters(max_q) max_q = max_q.order_by(models.Sample.timestamp.desc(), models.Sample.id.desc()).limit(1) # get the min timestamp value. min_q = session.query(models.Sample.timestamp)\ .filter(models.Sample.resource_id == res_id[0]) min_q = _apply_filters(min_q) min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1) sample = max_q.first() if sample: yield api_models.Resource( resource_id=sample.resource_id, project_id=sample.project_id, first_sample_timestamp=min_q.first().timestamp, last_sample_timestamp=sample.timestamp, source=sample.source_id, user_id=sample.user_id, metadata=sample.resource_metadata) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of api_models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional ID of the resource. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') metaquery = metaquery or {} def _apply_filters(query): # TODO(gordc) this should be merged with make_query_from_filter for column, value in [(models.Sample.resource_id, resource), (models.Sample.user_id, user), (models.Sample.project_id, project), (models.Sample.source_id, source)]: if value: query = query.filter(column == value) if metaquery: query = apply_metaquery_filter(session, query, metaquery) return query session = self._engine_facade.get_session() # sample_subq is used to reduce sample records # by selecting a record for each (resource_id, meter_id). # max() is used to choice a sample record, so the latest record # is selected for each (resource_id, meter_id). sample_subq = session.query( func.max(models.Sample.id).label('id'))\ .group_by(models.Sample.meter_id, models.Sample.resource_id) sample_subq = sample_subq.subquery() # SELECT sample.* FROM sample INNER JOIN # (SELECT max(sample.id) AS id FROM sample # GROUP BY sample.resource_id, sample.meter_id) AS anon_2 # ON sample.id = anon_2.id query_sample = session.query(models.MeterSample).\ join(sample_subq, models.MeterSample.id == sample_subq.c.id) query_sample = _apply_filters(query_sample) for sample in query_sample.all(): yield api_models.Meter(name=sample.counter_name, type=sample.counter_type, unit=sample.counter_unit, resource_id=sample.resource_id, project_id=sample.project_id, source=sample.source_id, user_id=sample.user_id) def _retrieve_samples(self, query): samples = query.all() for s in samples: # Remove the id generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. yield api_models.Sample( source=s.source_id, counter_name=s.counter_name, counter_type=s.counter_type, counter_unit=s.counter_unit, counter_volume=s.counter_volume, user_id=s.user_id, project_id=s.project_id, resource_id=s.resource_id, timestamp=s.timestamp, recorded_at=s.recorded_at, resource_metadata=s.resource_metadata, message_id=s.message_id, message_signature=s.message_signature, ) def get_samples(self, sample_filter, limit=None): """Return an iterable of api_models.Samples. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] table = models.MeterSample session = self._engine_facade.get_session() query = session.query(table) query = make_query_from_filter(session, query, sample_filter, require_meter=False) transformer = QueryTransformer(table, query) transformer.apply_options(None, limit) return self._retrieve_samples(transformer.get_query()) def _retrieve_data(self, filter_expr, orderby, limit, table): if limit == 0: return [] session = self._engine_facade.get_session() query = session.query(table) transformer = QueryTransformer(table, query) if filter_expr is not None: transformer.apply_filter(filter_expr) transformer.apply_options(orderby, limit) retrieve = { models.MeterSample: self._retrieve_samples, models.Alarm: self._retrieve_alarms, models.AlarmChange: self._retrieve_alarm_history } return retrieve[table](transformer.get_query()) def query_samples(self, filter_expr=None, orderby=None, limit=None): return self._retrieve_data(filter_expr, orderby, limit, models.MeterSample) @staticmethod def _get_aggregate_functions(aggregate): if not aggregate: return [f for f in STANDARD_AGGREGATES.values()] functions = [] for a in aggregate: if a.func in STANDARD_AGGREGATES: functions.append(STANDARD_AGGREGATES[a.func]) elif a.func in UNPARAMETERIZED_AGGREGATES: functions.append(UNPARAMETERIZED_AGGREGATES[a.func]) elif a.func in PARAMETERIZED_AGGREGATES['compute']: validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func) if not (validate and validate(a.param)): raise storage.StorageBadAggregate('Bad aggregate: %s.%s' % (a.func, a.param)) compute = PARAMETERIZED_AGGREGATES['compute'][a.func] functions.append(compute(a.param)) else: raise NotImplementedError('Selectable aggregate function %s' ' is not supported' % a.func) return functions def _make_stats_query(self, sample_filter, groupby, aggregate): select = [ models.Meter.unit, func.min(models.Sample.timestamp).label('tsmin'), func.max(models.Sample.timestamp).label('tsmax'), ] select.extend(self._get_aggregate_functions(aggregate)) session = self._engine_facade.get_session() if groupby: group_attributes = [getattr(models.Sample, g) for g in groupby] select.extend(group_attributes) query = session.query(*select).filter( models.Meter.id == models.Sample.meter_id)\ .group_by(models.Meter.unit) if groupby: query = query.group_by(*group_attributes) return make_query_from_filter(session, query, sample_filter) @staticmethod def _stats_result_aggregates(result, aggregate): stats_args = {} if isinstance(result.count, (int, long)): stats_args['count'] = result.count for attr in ['min', 'max', 'sum', 'avg']: if hasattr(result, attr): stats_args[attr] = getattr(result, attr) if aggregate: stats_args['aggregate'] = {} for a in aggregate: key = '%s%s' % (a.func, '/%s' % a.param if a.param else '') stats_args['aggregate'][key] = getattr(result, key) return stats_args @staticmethod def _stats_result_to_model(result, period, period_start, period_end, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result.unit duration = (timeutils.delta_seconds(result.tsmin, result.tsmax) if result.tsmin is not None and result.tsmax is not None else None) stats_args['duration'] = duration stats_args['duration_start'] = result.tsmin stats_args['duration_end'] = result.tsmax stats_args['period'] = period stats_args['period_start'] = period_start stats_args['period_end'] = period_end stats_args['groupby'] = (dict( (g, getattr(result, g)) for g in groupby) if groupby else None) return api_models.Statistics(**stats_args) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of api_models.Statistics instances containing meter statistics described by the query parameters. The filter must have a meter value set. """ if groupby: for group in groupby: if group not in ['user_id', 'project_id', 'resource_id']: raise NotImplementedError('Unable to group by ' 'these fields') if not period: for res in self._make_stats_query(sample_filter, groupby, aggregate): if res.count: yield self._stats_result_to_model(res, 0, res.tsmin, res.tsmax, groupby, aggregate) return if not sample_filter.start or not sample_filter.end: res = self._make_stats_query(sample_filter, None, aggregate).first() if not res: # NOTE(liusheng):The 'res' may be NoneType, because no # sample has found with sample filter(s). return query = self._make_stats_query(sample_filter, groupby, aggregate) # HACK(jd) This is an awful method to compute stats by period, but # since we're trying to be SQL agnostic we have to write portable # code, so here it is, admire! We're going to do one request to get # stats by period. We would like to use GROUP BY, but there's no # portable way to manipulate timestamp in SQL, so we can't. for period_start, period_end in base.iter_period( sample_filter.start or res.tsmin, sample_filter.end or res.tsmax, period): q = query.filter(models.Sample.timestamp >= period_start) q = q.filter(models.Sample.timestamp < period_end) for r in q.all(): if r.count: yield self._stats_result_to_model( result=r, period=int( timeutils.delta_seconds(period_start, period_end)), period_start=period_start, period_end=period_end, groupby=groupby, aggregate=aggregate) @staticmethod def _row_to_alarm_model(row): return api_models.Alarm( alarm_id=row.alarm_id, enabled=row.enabled, type=row.type, name=row.name, description=row.description, timestamp=row.timestamp, user_id=row.user_id, project_id=row.project_id, state=row.state, state_timestamp=row.state_timestamp, ok_actions=row.ok_actions, alarm_actions=row.alarm_actions, insufficient_data_actions=row.insufficient_data_actions, rule=row.rule, time_constraints=row.time_constraints, repeat_actions=row.repeat_actions) def _retrieve_alarms(self, query): return (self._row_to_alarm_model(x) for x in query.all()) def get_alarms(self, name=None, user=None, project=None, enabled=None, alarm_id=None, pagination=None): """Yields a lists of alarms that match filters :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param enabled: Optional boolean to list disable alarm. :param alarm_id: Optional alarm_id to return one alarm. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') session = self._engine_facade.get_session() query = session.query(models.Alarm) if name is not None: query = query.filter(models.Alarm.name == name) if enabled is not None: query = query.filter(models.Alarm.enabled == enabled) if user is not None: query = query.filter(models.Alarm.user_id == user) if project is not None: query = query.filter(models.Alarm.project_id == project) if alarm_id is not None: query = query.filter(models.Alarm.alarm_id == alarm_id) return self._retrieve_alarms(query) def create_alarm(self, alarm): """Create an alarm. :param alarm: The alarm to create. """ session = self._engine_facade.get_session() with session.begin(): alarm_row = models.Alarm(alarm_id=alarm.alarm_id) alarm_row.update(alarm.as_dict()) session.add(alarm_row) return self._row_to_alarm_model(alarm_row) def update_alarm(self, alarm): """Update an alarm. :param alarm: the new Alarm to update """ session = self._engine_facade.get_session() with session.begin(): alarm_row = session.merge(models.Alarm(alarm_id=alarm.alarm_id)) alarm_row.update(alarm.as_dict()) return self._row_to_alarm_model(alarm_row) def delete_alarm(self, alarm_id): """Delete an alarm :param alarm_id: ID of the alarm to delete """ session = self._engine_facade.get_session() with session.begin(): session.query(models.Alarm).filter( models.Alarm.alarm_id == alarm_id).delete() @staticmethod def _row_to_alarm_change_model(row): return api_models.AlarmChange(event_id=row.event_id, alarm_id=row.alarm_id, type=row.type, detail=row.detail, user_id=row.user_id, project_id=row.project_id, on_behalf_of=row.on_behalf_of, timestamp=row.timestamp) def query_alarms(self, filter_expr=None, orderby=None, limit=None): """Yields a lists of alarms that match filter """ return self._retrieve_data(filter_expr, orderby, limit, models.Alarm) def _retrieve_alarm_history(self, query): return (self._row_to_alarm_change_model(x) for x in query.all()) def query_alarm_history(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.AlarmChange objects. """ return self._retrieve_data(filter_expr, orderby, limit, models.AlarmChange) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): """Yields list of AlarmChanges describing alarm history Changes are always sorted in reverse order of occurrence, given the importance of currency. Segregation for non-administrative users is done on the basis of the on_behalf_of parameter. This allows such users to have visibility on both the changes initiated by themselves directly (generally creation, rule changes, or deletion) and also on those changes initiated on their behalf by the alarming service (state transitions after alarm thresholds are crossed). :param alarm_id: ID of alarm to return changes for :param on_behalf_of: ID of tenant to scope changes query (None for administrative user, indicating all projects) :param user: Optional ID of user to return changes for :param project: Optional ID of project to return changes for :project type: Optional change type :param start_timestamp: Optional modified timestamp start range :param start_timestamp_op: Optional timestamp start range operation :param end_timestamp: Optional modified timestamp end range :param end_timestamp_op: Optional timestamp end range operation """ session = self._engine_facade.get_session() query = session.query(models.AlarmChange) query = query.filter(models.AlarmChange.alarm_id == alarm_id) if on_behalf_of is not None: query = query.filter( models.AlarmChange.on_behalf_of == on_behalf_of) if user is not None: query = query.filter(models.AlarmChange.user_id == user) if project is not None: query = query.filter(models.AlarmChange.project_id == project) if type is not None: query = query.filter(models.AlarmChange.type == type) if start_timestamp: if start_timestamp_op == 'gt': query = query.filter( models.AlarmChange.timestamp > start_timestamp) else: query = query.filter( models.AlarmChange.timestamp >= start_timestamp) if end_timestamp: if end_timestamp_op == 'le': query = query.filter( models.AlarmChange.timestamp <= end_timestamp) else: query = query.filter( models.AlarmChange.timestamp < end_timestamp) query = query.order_by(desc(models.AlarmChange.timestamp)) return self._retrieve_alarm_history(query) def record_alarm_change(self, alarm_change): """Record alarm change event. """ session = self._engine_facade.get_session() with session.begin(): alarm_change_row = models.AlarmChange( event_id=alarm_change['event_id']) alarm_change_row.update(alarm_change) session.add(alarm_change_row) def _get_or_create_trait_type(self, trait_type, data_type, session=None): """Find if this trait already exists in the database, and if it does not, create a new entry in the trait type table. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): tt = session.query(models.TraitType).filter( models.TraitType.desc == trait_type, models.TraitType.data_type == data_type).first() if not tt: tt = models.TraitType(trait_type, data_type) session.add(tt) return tt def _make_trait(self, trait_model, event, session=None): """Make a new Trait from a Trait model. Doesn't flush or add to session. """ trait_type = self._get_or_create_trait_type(trait_model.name, trait_model.dtype, session) value_map = models.Trait._value_map values = { 't_string': None, 't_float': None, 't_int': None, 't_datetime': None } value = trait_model.value values[value_map[trait_model.dtype]] = value return models.Trait(trait_type, event, **values) def _get_or_create_event_type(self, event_type, session=None): """Here, we check to see if an event type with the supplied name already exists. If not, we create it and return the record. This may result in a flush. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) return et def _record_event(self, session, event_model): """Store a single Event, including related Traits. """ with session.begin(subtransactions=True): event_type = self._get_or_create_event_type(event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated) session.add(event) new_traits = [] if event_model.traits: for trait in event_model.traits: t = self._make_trait(trait, event, session=session) session.add(t) new_traits.append(t) # Note: we don't flush here, explicitly (unless a new trait or event # does it). Otherwise, just wait until all the Events are staged. return (event, new_traits) def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. Returns a list of events that could not be saved in a (reason, event) tuple. Reasons are enumerated in storage.model.Event Flush when they're all added, unless new EventTypes or TraitTypes are added along the way. """ session = self._engine_facade.get_session() events = [] problem_events = [] for event_model in event_models: event = None try: with session.begin(): event = self._record_event(session, event_model) except dbexc.DBDuplicateEntry: problem_events.append( (api_models.Event.DUPLICATE, event_model)) except Exception as e: LOG.exception(_('Failed to record event: %s') % e) problem_events.append( (api_models.Event.UNKNOWN_PROBLEM, event_model)) events.append(event) return problem_events def get_events(self, event_filter): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance """ start = event_filter.start_time end = event_filter.end_time session = self._engine_facade.get_session() LOG.debug(_("Getting events that match filter: %s") % event_filter) with session.begin(): event_query = session.query(models.Event) # Build up the join conditions event_join_conditions = [ models.EventType.id == models.Event.event_type_id ] if event_filter.event_type: event_join_conditions\ .append(models.EventType.desc == event_filter.event_type) event_query = event_query.join(models.EventType, and_(*event_join_conditions)) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions\ .append(models.Event.message_id == event_filter.message_id) if start: event_filter_conditions.append(models.Event.generated >= start) if end: event_filter_conditions.append(models.Event.generated <= end) if event_filter_conditions: event_query = event_query\ .filter(and_(*event_filter_conditions)) event_models_dict = {} if event_filter.traits_filter: for trait_filter in event_filter.traits_filter: # Build a sub query that joins Trait to TraitType # where the trait name matches trait_name = trait_filter.pop('key') conditions = [ models.Trait.trait_type_id == models.TraitType.id, models.TraitType.desc == trait_name ] for key, value in trait_filter.iteritems(): if key == 'string': conditions.append(models.Trait.t_string == value) elif key == 'integer': conditions.append(models.Trait.t_int == value) elif key == 'datetime': conditions.append(models.Trait.t_datetime == value) elif key == 'float': conditions.append(models.Trait.t_float == value) trait_query = session.query(models.Trait.event_id)\ .join(models.TraitType, and_(*conditions)).subquery() event_query = event_query\ .join(trait_query, models.Event.id == trait_query.c.event_id) else: # If there are no trait filters, grab the events from the db query = session.query(models.Event.id, models.Event.generated, models.Event.message_id, models.EventType.desc)\ .join(models.EventType, and_(*event_join_conditions)) if event_filter_conditions: query = query.filter(and_(*event_filter_conditions)) for (id, generated, message_id, desc) in query.all(): event_models_dict[id] = api_models.Event( message_id, desc, generated, []) # Build event models for the events event_query = event_query.subquery() query = session.query(models.Trait)\ .join(models.TraitType, models.Trait.trait_type_id == models.TraitType.id)\ .join(event_query, models.Trait.event_id == event_query.c.id) # Now convert the sqlalchemy objects back into Models ... for trait in query.all(): event = event_models_dict.get(trait.event_id) if not event: event = api_models.Event(trait.event.message_id, trait.event.event_type.desc, trait.event.generated, []) event_models_dict[trait.event_id] = event trait_model = api_models.Trait(trait.trait_type.desc, trait.trait_type.data_type, trait.get_value()) event.append_trait(trait_model) event_models = event_models_dict.values() return sorted(event_models, key=operator.attrgetter('generated')) def get_event_types(self): """Return all event types as an iterable of strings. """ session = self._engine_facade.get_session() with session.begin(): query = session.query(models.EventType.desc)\ .order_by(models.EventType.desc) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait type. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() LOG.debug(_("Get traits for %s") % event_type) with session.begin(): query = (session.query( models.TraitType.desc, models.TraitType.data_type).join( models.Trait, models.Trait.trait_type_id == models.TraitType.id).join( models.Event, models.Event.id == models.Trait.event_id).join( models.EventType, and_( models.EventType.id == models.Event.id, models.EventType.desc == event_type)).group_by( models.TraitType.desc, models.TraitType.data_type).distinct()) for desc, type in query.all(): yield {'name': desc, 'data_type': type} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): trait_type_filters = [ models.TraitType.id == models.Trait.trait_type_id ] if trait_type: trait_type_filters.append(models.TraitType.desc == trait_type) query = (session.query(models.Trait).join( models.TraitType, and_(*trait_type_filters)).join( models.Event, models.Event.id == models.Trait.event_id).join( models.EventType, and_(models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type))) for trait in query.all(): type = trait.trait_type yield api_models.Trait(name=type.desc, dtype=type.data_type, value=trait.get_value())
class Connection(base.Connection): """Put the event data into a SQLAlchemy database. Tables:: - EventType - event definition - { id: event type id desc: description of event } - Event - event data - { id: event id message_id: message id generated = timestamp of event event_type_id = event type -> eventtype.id } - TraitInt - int trait value - { event_id: event -> event.id key: trait type value: integer value } - TraitDatetime - int trait value - { event_id: event -> event.id key: trait type value: datetime value } - TraitText - int trait value - { event_id: event -> event.id key: trait type value: text value } - TraitFloat - int trait value - { event_id: event -> event.id key: trait type value: float value } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): # Set max_retries to 0, since oslo.db in certain cases may attempt # to retry making the db connection retried max_retries ^ 2 times # in failure case and db reconnection has already been implemented # in storage.__init__.get_connection_from_config function options = dict(cfg.CONF.database.items()) options['max_retries'] = 0 self._engine_facade = db_session.EngineFacade(url, **options) def upgrade(self): # NOTE(gordc): to minimise memory, only import migration when needed from oslo.db.sqlalchemy import migration path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'storage', 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) self._engine_facade._session_maker.close_all() engine.dispose() def _get_or_create_event_type(self, event_type, session=None): """Check if an event type with the supplied name is already exists. If not, we create it and return the record. This may result in a flush. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) return et def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. Returns a list of events that could not be saved in a (reason, event) tuple. Reasons are enumerated in storage.model.Event Flush when they're all added, unless new EventTypes or TraitTypes are added along the way. """ session = self._engine_facade.get_session() problem_events = [] for event_model in event_models: event = None try: with session.begin(): event_type = self._get_or_create_event_type( event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated) session.add(event) session.flush() if event_model.traits: trait_map = {} for trait in event_model.traits: if trait_map.get(trait.dtype) is None: trait_map[trait.dtype] = [] trait_map[trait.dtype].append({ 'event_id': event.id, 'key': trait.name, 'value': trait.value }) for dtype in trait_map.keys(): model = TRAIT_ID_TO_MODEL[dtype] session.execute(model.__table__.insert(), trait_map[dtype]) except dbexc.DBDuplicateEntry as e: LOG.exception(_("Failed to record duplicated event: %s") % e) problem_events.append( (api_models.Event.DUPLICATE, event_model)) except KeyError as e: LOG.exception(_('Failed to record event: %s') % e) problem_events.append( (api_models.Event.INCOMPATIBLE_TRAIT, event_model)) except Exception as e: LOG.exception(_('Failed to record event: %s') % e) problem_events.append( (api_models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance """ session = self._engine_facade.get_session() with session.begin(): event_query = session.query(models.Event) # Build up the join conditions event_join_conditions = [ models.EventType.id == models.Event.event_type_id ] if event_filter.event_type: event_join_conditions.append( models.EventType.desc == event_filter.event_type) event_query = event_query.join(models.EventType, sa.and_(*event_join_conditions)) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions.append( models.Event.message_id == event_filter.message_id) if event_filter.start_timestamp: event_filter_conditions.append( models.Event.generated >= event_filter.start_timestamp) if event_filter.end_timestamp: event_filter_conditions.append( models.Event.generated <= event_filter.end_timestamp) if event_filter_conditions: event_query = (event_query.filter( sa.and_(*event_filter_conditions))) trait_subq = None # Build trait filter if event_filter.traits_filter: trait_qlist = [] for trait_filter in event_filter.traits_filter: key = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') trait_qlist.append( _build_trait_query(session, trait_filter.keys()[0], key, trait_filter.values()[0], op)) trait_subq = trait_qlist.pop() if trait_qlist: trait_subq = trait_subq.intersect(*trait_qlist) trait_subq = trait_subq.subquery() query = (session.query(models.Event.id).join( models.EventType, sa.and_(*event_join_conditions))) if trait_subq is not None: query = query.join(trait_subq, trait_subq.c.ev_id == models.Event.id) if event_filter_conditions: query = query.filter(sa.and_(*event_filter_conditions)) event_list = {} # get a list of all events that match filters for (id_, generated, message_id, desc) in query.add_columns(models.Event.generated, models.Event.message_id, models.EventType.desc).order_by( models.Event.generated).all(): event_list[id_] = api_models.Event(message_id, desc, generated, []) # Query all traits related to events. # NOTE (gordc): cast is done because pgsql defaults to TEXT when # handling unknown values such as null. trait_q = (query.join( models.TraitDatetime, models.TraitDatetime.event_id == models.Event.id).add_columns( models.TraitDatetime.key, models.TraitDatetime.value, sa.cast(sa.null(), sa.Integer), sa.cast(sa.null(), sa.Float(53)), sa.cast(sa.null(), sa.Text))).union( query.join(models.TraitInt, models.TraitInt.event_id == models.Event.id).add_columns( models.TraitInt.key, sa.null(), models.TraitInt.value, sa.null(), sa.null()), query.join( models.TraitFloat, models.TraitFloat.event_id == models.Event.id).add_columns( models.TraitFloat.key, sa.null(), sa.null(), models.TraitFloat.value, sa.null()), query.join( models.TraitText, models.TraitText.event_id == models.Event.id).add_columns( models.TraitText.key, sa.null(), sa.null(), sa.null(), models.TraitText.value)) for id_, key, t_date, t_int, t_float, t_text in trait_q.all(): if t_int: dtype = api_models.Trait.INT_TYPE val = t_int elif t_float: dtype = api_models.Trait.FLOAT_TYPE val = t_float elif t_date: dtype = api_models.Trait.DATETIME_TYPE val = t_date else: dtype = api_models.Trait.TEXT_TYPE val = t_text trait_model = api_models.Trait(key, dtype, val) event_list[id_].append_trait(trait_model) return event_list.values() def get_event_types(self): """Return all event types as an iterable of strings.""" session = self._engine_facade.get_session() with session.begin(): query = (session.query(models.EventType.desc).order_by( models.EventType.desc)) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [ models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime ]: query = (session.query(trait_model.key).join( models.Event, models.Event.id == trait_model.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)).distinct()) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for row in query.all(): yield {'name': row[0], 'data_type': dtype} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [ models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime ]: query = (session.query( trait_model.key, trait_model.value).join( models.Event, models.Event.id == trait_model.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)).order_by( trait_model.key)) if trait_type: query = query.filter(trait_model.key == trait_type) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for k, v in query.all(): yield api_models.Trait(name=k, dtype=dtype, value=v)
class Connection(hbase_base.Connection, base.Connection): """Put the metering data into a HBase database Collections: - meter (describes sample actually): - row-key: consists of reversed timestamp, meter and a message uuid for purposes of uniqueness - Column Families: f: contains the following qualifiers: - counter_name: <name of counter> - counter_type: <type of counter> - counter_unit: <unit of counter> - counter_volume: <volume of counter> - message: <raw incoming data> - message_id: <id of message> - message_signature: <signature of message> - resource_metadata: raw metadata for corresponding resource of the meter - project_id: <id of project> - resource_id: <id of resource> - user_id: <id of user> - recorded_at: <datetime when sample has been recorded (utc.now)> - flattened metadata with prefix r_metadata. e.g.:: f:r_metadata.display_name or f:r_metadata.tag - rts: <reversed timestamp of entry> - timestamp: <meter's timestamp (came from message)> - source for meter with prefix 's' - resource: - row_key: uuid of resource - Column Families: f: contains the following qualifiers: - resource_metadata: raw metadata for corresponding resource - project_id: <id of project> - resource_id: <id of resource> - user_id: <id of user> - flattened metadata with prefix r_metadata. e.g.:: f:r_metadata.display_name or f:r_metadata.tag - sources for all corresponding meters with prefix 's' - all meters with prefix 'm' for this resource in format: .. code-block:: python "%s:%s:%s:%s:%s" % (rts, source, counter_name, counter_type, counter_unit) """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None RESOURCE_TABLE = "resource" METER_TABLE = "meter" def __init__(self, url): super(Connection, self).__init__(url) def upgrade(self): tables = [self.RESOURCE_TABLE, self.METER_TABLE] column_families = {'f': dict(max_versions=1)} with self.conn_pool.connection() as conn: hbase_utils.create_tables(conn, tables, column_families) hbase_migration.migrate_tables(conn, tables) def clear(self): LOG.debug('Dropping HBase schema...') with self.conn_pool.connection() as conn: for table in [self.RESOURCE_TABLE, self.METER_TABLE]: try: conn.disable_table(table) except Exception: LOG.debug('Cannot disable table but ignoring error') try: conn.delete_table(table) except Exception: LOG.debug('Cannot delete table but ignoring error') def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) meter_table = conn.table(self.METER_TABLE) resource_metadata = data.get('resource_metadata', {}) # Determine the name of new meter rts = hbase_utils.timestamp(data['timestamp']) new_meter = hbase_utils.prepare_key(rts, data['source'], data['counter_name'], data['counter_type'], data['counter_unit']) # TODO(nprivalova): try not to store resource_id resource = hbase_utils.serialize_entry( **{ 'source': data['source'], 'meter': { new_meter: data['timestamp'] }, 'resource_metadata': resource_metadata, 'resource_id': data['resource_id'], 'project_id': data['project_id'], 'user_id': data['user_id'] }) # Here we put entry in HBase with our own timestamp. This is needed # when samples arrive out-of-order # If we use timestamp=data['timestamp'] the newest data will be # automatically 'on the top'. It is needed to keep metadata # up-to-date: metadata from newest samples is considered as actual. ts = int(time.mktime(data['timestamp'].timetuple()) * 1000) resource_table.put(hbase_utils.encode_unicode(data['resource_id']), resource, ts) # Rowkey consists of reversed timestamp, meter and a # message uuid for purposes of uniqueness row = hbase_utils.prepare_key(data['counter_name'], rts, data['message_id']) record = hbase_utils.serialize_entry( data, **{ 'source': data['source'], 'rts': rts, 'message': data, 'recorded_at': timeutils.utcnow() }) meter_table.put(row, record) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, limit=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like ge, gt. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param limit: Maximum number of results to return. """ if limit == 0: return q = hbase_utils.make_query(metaquery=metaquery, user_id=user, project_id=project, resource_id=resource, source=source) q = hbase_utils.make_meter_query_for_resource(start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, source, q) with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) LOG.debug("Query Resource table: %s", q) for resource_id, data in resource_table.scan(filter=q, limit=limit): f_res, meters, md = hbase_utils.deserialize_entry(data) resource_id = hbase_utils.encode_unicode(resource_id) # Unfortunately happybase doesn't keep ordered result from # HBase. So that's why it's needed to find min and max # manually first_ts = min(meters, key=operator.itemgetter(1))[1] last_ts = max(meters, key=operator.itemgetter(1))[1] source = meters[0][0][1] # If we use QualifierFilter then HBase returnes only # qualifiers filtered by. It will not return the whole entry. # That's why if we need to ask additional qualifiers manually. if 'project_id' not in f_res and 'user_id' not in f_res: row = resource_table.row(resource_id, columns=[ 'f:project_id', 'f:user_id', 'f:resource_metadata' ]) f_res, _m, md = hbase_utils.deserialize_entry(row) yield models.Resource(resource_id=resource_id, first_sample_timestamp=first_ts, last_sample_timestamp=last_ts, project_id=f_res['project_id'], source=source, user_id=f_res['user_id'], metadata=md) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, limit=None, unique=False): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param limit: Maximum number of results to return. :param unique: If set to true, return only unique meter information. """ if limit == 0: return metaquery = metaquery or {} with self.conn_pool.connection() as conn: resource_table = conn.table(self.RESOURCE_TABLE) q = hbase_utils.make_query(metaquery=metaquery, user_id=user, project_id=project, resource_id=resource, source=source) LOG.debug("Query Resource table: %s", q) gen = resource_table.scan(filter=q) # We need result set to be sure that user doesn't receive several # same meters. Please see bug # https://bugs.launchpad.net/ceilometer/+bug/1301371 result = set() for ignored, data in gen: flatten_result, meters, md = hbase_utils.deserialize_entry( data) for m in meters: if limit and len(result) >= limit: return _m_rts, m_source, name, m_type, unit = m[0] if unique: meter_dict = { 'name': name, 'type': m_type, 'unit': unit, 'resource_id': None, 'project_id': None, 'user_id': None, 'source': None } else: meter_dict = { 'name': name, 'type': m_type, 'unit': unit, 'resource_id': flatten_result['resource_id'], 'project_id': flatten_result['project_id'], 'user_id': flatten_result['user_id'] } frozen_meter = frozenset(meter_dict.items()) if frozen_meter in result: continue result.add(frozen_meter) if not unique: meter_dict.update( {'source': m_source if m_source else None}) yield models.Meter(**meter_dict) def get_samples(self, sample_filter, limit=None): """Return an iterable of models.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop, columns = ( hbase_utils.make_sample_query_from_filter(sample_filter, require_meter=False)) LOG.debug("Query Meter Table: %s", q) gen = meter_table.scan(filter=q, row_start=start, row_stop=stop, limit=limit, columns=columns) for ignored, meter in gen: d_meter = hbase_utils.deserialize_entry(meter)[0] d_meter['message']['counter_volume'] = (float( d_meter['message']['counter_volume'])) d_meter['message']['recorded_at'] = d_meter['recorded_at'] yield models.Sample(**d_meter['message']) @staticmethod def _update_meter_stats(stat, meter): """Do the stats calculation on a requested time bucket in stats dict :param stats: dict where aggregated stats are kept :param index: time bucket index in stats :param meter: meter record as returned from HBase :param start_time: query start time :param period: length of the time bucket """ vol = meter['counter_volume'] ts = meter['timestamp'] stat.unit = meter['counter_unit'] stat.min = min(vol, stat.min or vol) stat.max = max(vol, stat.max) stat.sum = vol + (stat.sum or 0) stat.count += 1 stat.avg = (stat.sum / float(stat.count)) stat.duration_start = min(ts, stat.duration_start or ts) stat.duration_end = max(ts, stat.duration_end or ts) stat.duration = (timeutils.delta_seconds(stat.duration_start, stat.duration_end)) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. .. note:: Due to HBase limitations the aggregations are implemented in the driver itself, therefore this method will be quite slow because of all the Thrift traffic it is going to create. """ if groupby: raise ceilometer.NotImplementedError("Group by not implemented.") if aggregate: raise ceilometer.NotImplementedError( 'Selectable aggregates not implemented') with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop, columns = ( hbase_utils.make_sample_query_from_filter(sample_filter)) # These fields are used in statistics' calculating columns.extend( ['f:timestamp', 'f:counter_volume', 'f:counter_unit']) meters = map( hbase_utils.deserialize_entry, list(meter for (ignored, meter) in meter_table.scan( filter=q, row_start=start, row_stop=stop, columns=columns))) if sample_filter.start_timestamp: start_time = sample_filter.start_timestamp elif meters: start_time = meters[-1][0]['timestamp'] else: start_time = None if sample_filter.end_timestamp: end_time = sample_filter.end_timestamp elif meters: end_time = meters[0][0]['timestamp'] else: end_time = None results = [] if not period: period = 0 period_start = start_time period_end = end_time # As our HBase meters are stored as newest-first, we need to iterate # in the reverse order for meter in meters[::-1]: ts = meter[0]['timestamp'] if period: offset = int( timeutils.delta_seconds(start_time, ts) / period) * period period_start = start_time + datetime.timedelta(0, offset) if not results or not results[-1].period_start == period_start: if period: period_end = period_start + datetime.timedelta(0, period) results.append( models.Statistics(unit='', count=0, min=0, max=0, avg=0, sum=0, period=period, period_start=period_start, period_end=period_end, duration=None, duration_start=None, duration_end=None, groupby=None)) self._update_meter_stats(results[-1], meter[0]) return results
class Connection(base.Connection): """Put the event data into an ElasticSearch db. Events in ElasticSearch are indexed by day and stored by event_type. An example document:: {"_index":"events_2014-10-21", "_type":"event_type0", "_id":"dc90e464-65ab-4a5d-bf66-ecb956b5d779", "_score":1.0, "_source":{"timestamp": "2014-10-21T20:02:09.274797" "traits": {"id4_0": "2014-10-21T20:02:09.274797", "id3_0": 0.7510790937279408, "id2_0": 5, "id1_0": "18c97ba1-3b74-441a-b948-a702a30cbce2"} } } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) index_name = 'events' # NOTE(gordc): mainly for testing, data is not searchable after write, # it is only searchable after periodic refreshes. _refresh_on_write = False def __init__(self, url): url_split = netutils.urlsplit(url) self.conn = es.Elasticsearch(url_split.netloc) def upgrade(self): iclient = es.client.IndicesClient(self.conn) ts_template = { 'template': '*', 'mappings': { '_default_': { '_timestamp': { 'enabled': True, 'store': True }, 'properties': { 'traits': { 'type': 'nested' } } } } } iclient.put_template(name='enable_timestamp', body=ts_template) def record_events(self, events): def _build_bulk_index(event_list): for ev in event_list: traits = {t.name: t.value for t in ev.traits} yield { '_op_type': 'create', '_index': '%s_%s' % (self.index_name, ev.generated.date().isoformat()), '_type': ev.event_type, '_id': ev.message_id, '_source': { 'timestamp': ev.generated.isoformat(), 'traits': traits, 'raw': ev.raw } } error = None for ok, result in helpers.streaming_bulk(self.conn, _build_bulk_index(events)): if not ok: __, result = result.popitem() if result['status'] == 409: LOG.info( _LI('Duplicate event detected, skipping it: %s') % result) else: LOG.exception(_LE('Failed to record event: %s') % result) error = storage.StorageUnknownWriteError(result) if self._refresh_on_write: self.conn.indices.refresh(index='%s_*' % self.index_name) while self.conn.cluster.pending_tasks(local=True)['tasks']: pass if error: raise error def _make_dsl_from_filter(self, indices, ev_filter): q_args = {} filters = [] if ev_filter.start_timestamp: filters.append({ 'range': { 'timestamp': { 'ge': ev_filter.start_timestamp.isoformat() } } }) while indices[0] < ( '%s_%s' % (self.index_name, ev_filter.start_timestamp.date().isoformat())): del indices[0] if ev_filter.end_timestamp: filters.append({ 'range': { 'timestamp': { 'le': ev_filter.end_timestamp.isoformat() } } }) while indices[-1] > ( '%s_%s' % (self.index_name, ev_filter.end_timestamp.date().isoformat())): del indices[-1] q_args['index'] = indices if ev_filter.event_type: q_args['doc_type'] = ev_filter.event_type if ev_filter.message_id: filters.append({'term': {'_id': ev_filter.message_id}}) if ev_filter.traits_filter or ev_filter.admin_proj: trait_filters = [] or_cond = [] for t_filter in ev_filter.traits_filter or []: value = None for val_type in ['integer', 'string', 'float', 'datetime']: if t_filter.get(val_type): value = t_filter.get(val_type) if isinstance(value, six.string_types): value = value.lower() elif isinstance(value, datetime.datetime): value = value.isoformat() break if t_filter.get('op') in ['gt', 'ge', 'lt', 'le']: op = (t_filter.get('op').replace('ge', 'gte').replace( 'le', 'lte')) trait_filters.append( {'range': { t_filter['key']: { op: value } }}) else: tf = { "query": { "query_string": { "query": "%s: \"%s\"" % (t_filter['key'], value) } } } if t_filter.get('op') == 'ne': tf = {"not": tf} trait_filters.append(tf) if ev_filter.admin_proj: or_cond = [{ 'missing': { 'field': 'project_id' } }, { 'term': { 'project_id': ev_filter.admin_proj } }] filters.append({ 'nested': { 'path': 'traits', 'query': { 'filtered': { 'filter': { 'bool': { 'must': trait_filters, 'should': or_cond } } } } } }) q_args['body'] = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': filters } } } } } return q_args def get_events(self, event_filter, limit=None): if limit == 0: return iclient = es.client.IndicesClient(self.conn) indices = iclient.get_mapping('%s_*' % self.index_name).keys() if indices: filter_args = self._make_dsl_from_filter(indices, event_filter) if limit is not None: filter_args['size'] = limit results = self.conn.search( fields=['_id', 'timestamp', '_type', '_source'], sort='timestamp:asc', **filter_args) trait_mappings = {} for record in results['hits']['hits']: trait_list = [] if not record['_type'] in trait_mappings: trait_mappings[record['_type']] = list( self.get_trait_types(record['_type'])) for key in record['_source']['traits'].keys(): value = record['_source']['traits'][key] for t_map in trait_mappings[record['_type']]: if t_map['name'] == key: dtype = t_map['data_type'] break else: dtype = models.Trait.TEXT_TYPE trait_list.append( models.Trait(name=key, dtype=dtype, value=models.Trait.convert_value( dtype, value))) gen_ts = timeutils.normalize_time( timeutils.parse_isotime(record['_source']['timestamp'])) yield models.Event(message_id=record['_id'], event_type=record['_type'], generated=gen_ts, traits=sorted( trait_list, key=operator.attrgetter('dtype')), raw=record['_source']['raw']) def get_event_types(self): iclient = es.client.IndicesClient(self.conn) es_mappings = iclient.get_mapping('%s_*' % self.index_name) seen_types = set() for index in es_mappings.keys(): for ev_type in es_mappings[index]['mappings'].keys(): seen_types.add(ev_type) # TODO(gordc): tests assume sorted ordering but backends are not # explicitly ordered. # NOTE: _default_ is a type that appears in all mappings but is not # real 'type' seen_types.discard('_default_') return sorted(list(seen_types)) @staticmethod def _remap_es_types(d_type): if d_type == 'string': d_type = 'text' elif d_type == 'long': d_type = 'int' elif d_type == 'double': d_type = 'float' elif d_type == 'date' or d_type == 'date_time': d_type = 'datetime' return d_type def get_trait_types(self, event_type): iclient = es.client.IndicesClient(self.conn) es_mappings = iclient.get_mapping('%s_*' % self.index_name) seen_types = [] for index in es_mappings.keys(): # if event_type exists in index and has traits if (es_mappings[index]['mappings'].get(event_type) and es_mappings[index]['mappings'][event_type] ['properties']['traits'].get('properties')): for t_type in (es_mappings[index]['mappings'][event_type] ['properties']['traits']['properties'].keys()): d_type = ( es_mappings[index]['mappings'][event_type] ['properties']['traits']['properties'][t_type]['type']) d_type = models.Trait.get_type_by_name( self._remap_es_types(d_type)) if (t_type, d_type) not in seen_types: yield {'name': t_type, 'data_type': d_type} seen_types.append((t_type, d_type)) def get_traits(self, event_type, trait_type=None): t_types = dict((res['name'], res['data_type']) for res in self.get_trait_types(event_type)) if not t_types or (trait_type and trait_type not in t_types.keys()): return result = self.conn.search('%s_*' % self.index_name, event_type) for ev in result['hits']['hits']: if trait_type and ev['_source']['traits'].get(trait_type): yield models.Trait(name=trait_type, dtype=t_types[trait_type], value=models.Trait.convert_value( t_types[trait_type], ev['_source']['traits'][trait_type])) else: for trait in ev['_source']['traits'].keys(): yield models.Trait(name=trait, dtype=t_types[trait], value=models.Trait.convert_value( t_types[trait], ev['_source']['traits'][trait]))
class Connection(base.Connection): """Base Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') metaquery = metaquery or {} q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if resource is not None: q['_id'] = resource if source is not None: q['source'] = source q.update(metaquery) for r in self.db.resource.find(q): for r_meter in r['meter']: yield models.Meter( name=r_meter['counter_name'], type=r_meter['counter_type'], # Return empty string if 'counter_unit' is not valid for # backward compatibility. unit=r_meter.get('counter_unit', ''), resource_id=r['_id'], project_id=r['project_id'], source=r['source'], user_id=r['user_id'], ) def update_alarm(self, alarm): """Update alarm.""" data = alarm.as_dict() self.db.alarm.update({'alarm_id': alarm.alarm_id}, {'$set': data}, upsert=True) stored_alarm = self.db.alarm.find({'alarm_id': alarm.alarm_id})[0] del stored_alarm['_id'] self._ensure_encapsulated_rule_format(stored_alarm) self._ensure_time_constraints(stored_alarm) return alarm_models.Alarm(**stored_alarm) create_alarm = update_alarm def delete_alarm(self, alarm_id): """Delete an alarm.""" self.db.alarm.remove({'alarm_id': alarm_id}) def record_alarm_change(self, alarm_change): """Record alarm change event.""" self.db.alarm_history.insert(alarm_change.copy()) def get_samples(self, sample_filter, limit=None): """Return an iterable of model.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] q = pymongo_utils.make_query_from_filter(sample_filter, require_meter=False) return self._retrieve_samples(q, [("timestamp", pymongo.DESCENDING)], limit) def get_alarms(self, name=None, user=None, state=None, meter=None, project=None, enabled=None, alarm_id=None, pagination=None): """Yields a lists of alarms that match filters :param name: The Alarm name. :param user: Optional ID for user that owns the resource. :param state: Optional string for alarm state. :param meter: Optional string for alarms associated with meter. :param project: Optional ID for project that owns the resource. :param enabled: Optional boolean to list disable alarm. :param alarm_id: Optional alarm_id to return one alarm. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if name is not None: q['name'] = name if enabled is not None: q['enabled'] = enabled if alarm_id is not None: q['alarm_id'] = alarm_id if state is not None: q['state'] = state if meter is not None: q['rule.meter_name'] = meter return self._retrieve_alarms(q, [], None) def get_alarm_changes(self, alarm_id, on_behalf_of, user=None, project=None, type=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None): """Yields list of AlarmChanges describing alarm history Changes are always sorted in reverse order of occurrence, given the importance of currency. Segregation for non-administrative users is done on the basis of the on_behalf_of parameter. This allows such users to have visibility on both the changes initiated by themselves directly (generally creation, rule changes, or deletion) and also on those changes initiated on their behalf by the alarming service (state transitions after alarm thresholds are crossed). :param alarm_id: ID of alarm to return changes for :param on_behalf_of: ID of tenant to scope changes query (None for administrative user, indicating all projects) :param user: Optional ID of user to return changes for :param project: Optional ID of project to return changes for :project type: Optional change type :param start_timestamp: Optional modified timestamp start range :param start_timestamp_op: Optional timestamp start range operation :param end_timestamp: Optional modified timestamp end range :param end_timestamp_op: Optional timestamp end range operation """ q = dict(alarm_id=alarm_id) if on_behalf_of is not None: q['on_behalf_of'] = on_behalf_of if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if type is not None: q['type'] = type if start_timestamp or end_timestamp: ts_range = pymongo_utils.make_timestamp_range( start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: q['timestamp'] = ts_range return self._retrieve_alarm_changes( q, [("timestamp", pymongo.DESCENDING)], None) def record_events(self, event_models): """Write the events to database. Return a list of events of type models.Event.DUPLICATE in case of trying to write an already existing event to the database, or models.Event.UNKONW_PROBLEM in case of any failures with recording the event in the database. :param event_models: a list of models.Event objects. """ problem_events = [] for event_model in event_models: traits = [] if event_model.traits: for trait in event_model.traits: traits.append({ 'trait_name': trait.name, 'trait_type': trait.dtype, 'trait_value': trait.value }) try: self.db.event.insert({ '_id': event_model.message_id, 'event_type': event_model.event_type, 'timestamp': event_model.generated, 'traits': traits }) except pymongo.errors.DuplicateKeyError: problem_events.append((models.Event.DUPLICATE, event_model)) except Exception as ex: LOG.exception(_("Failed to record event: %s") % ex) problem_events.append( (models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return a list of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. """ q = pymongo_utils.make_events_query_from_filter(event_filter) res_events = [] for event in self.db.event.find(q): traits = [] for trait in event['traits']: traits.append( models.Trait(name=trait['trait_name'], dtype=int(trait['trait_type']), value=trait['trait_value'])) res_events.append( models.Event(message_id=event['_id'], event_type=event['event_type'], generated=event['timestamp'], traits=traits)) return res_events def get_event_types(self): """Return all event types as an iter of strings.""" event_types = set() events = self.db.event.find() for event in events: event_type = event['event_type'] if event_type not in event_types: event_types.add(event_type) yield event_type def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event. """ trait_names = set() events = self.db.event.find({'event_type': event_type}) for event in events: for trait in event['traits']: trait_name = trait['trait_name'] if trait_name not in trait_names: # Here we check that our method return only unique # trait types. Method will return only one trait type. It # is proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) yield { 'name': trait_name, 'data_type': trait['trait_type'] } def get_traits(self, event_type, trait_name=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_name: the name of the Trait to filter by """ if not trait_name: events = self.db.event.find({'event_type': event_type}) else: # We choose events that simultaneously have event_type and certain # trait_name, and retrieve events contains only mentioned traits. events = self.db.event.find( { '$and': [{ 'event_type': event_type }, { 'traits.trait_name': trait_name }] }, {'traits': { '$elemMatch': { 'trait_name': trait_name } }}) traits = [] for event in events: for trait in event['traits']: traits.append( models.Trait(name=trait['trait_name'], dtype=trait['trait_type'], value=trait['trait_value'])) for trait in sorted(traits, key=operator.attrgetter('dtype')): yield trait def query_samples(self, filter_expr=None, orderby=None, limit=None): return self._retrieve_data(filter_expr, orderby, limit, models.Meter) def query_alarms(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.Alarm objects.""" return self._retrieve_data(filter_expr, orderby, limit, alarm_models.Alarm) def query_alarm_history(self, filter_expr=None, orderby=None, limit=None): """Return an iterable of model.AlarmChange objects.""" return self._retrieve_data(filter_expr, orderby, limit, alarm_models.AlarmChange) def _retrieve_data(self, filter_expr, orderby, limit, model): if limit == 0: return [] query_filter = {} orderby_filter = [("timestamp", pymongo.DESCENDING)] transformer = pymongo_utils.QueryTransformer() if orderby is not None: orderby_filter = transformer.transform_orderby(orderby) if filter_expr is not None: query_filter = transformer.transform_filter(filter_expr) retrieve = { models.Meter: self._retrieve_samples, alarm_models.Alarm: self._retrieve_alarms, alarm_models.AlarmChange: self._retrieve_alarm_changes } return retrieve[model](query_filter, orderby_filter, limit) def _retrieve_samples(self, query, orderby, limit): if limit is not None: samples = self.db.meter.find(query, limit=limit, sort=orderby) else: samples = self.db.meter.find(query, sort=orderby) for s in samples: # Remove the ObjectId generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. del s['_id'] # Backward compatibility for samples without units s['counter_unit'] = s.get('counter_unit', '') # Tolerate absence of recorded_at in older datapoints s['recorded_at'] = s.get('recorded_at') yield models.Sample(**s) def _retrieve_alarms(self, query_filter, orderby, limit): if limit is not None: alarms = self.db.alarm.find(query_filter, limit=limit, sort=orderby) else: alarms = self.db.alarm.find(query_filter, sort=orderby) for alarm in alarms: a = {} a.update(alarm) del a['_id'] self._ensure_encapsulated_rule_format(a) self._ensure_time_constraints(a) yield alarm_models.Alarm(**a) def _retrieve_alarm_changes(self, query_filter, orderby, limit): if limit is not None: alarms_history = self.db.alarm_history.find(query_filter, limit=limit, sort=orderby) else: alarms_history = self.db.alarm_history.find(query_filter, sort=orderby) for alarm_history in alarms_history: ah = {} ah.update(alarm_history) del ah['_id'] yield alarm_models.AlarmChange(**ah) @classmethod def _ensure_encapsulated_rule_format(cls, alarm): """Ensure the alarm returned by the storage have the correct format. The previous format looks like: { 'alarm_id': '0ld-4l3rt', 'enabled': True, 'name': 'old-alert', 'description': 'old-alert', 'timestamp': None, 'meter_name': 'cpu', 'user_id': 'me', 'project_id': 'and-da-boys', 'comparison_operator': 'lt', 'threshold': 36, 'statistic': 'count', 'evaluation_periods': 1, 'period': 60, 'state': "insufficient data", 'state_timestamp': None, 'ok_actions': [], 'alarm_actions': ['http://nowhere/alarms'], 'insufficient_data_actions': [], 'repeat_actions': False, 'matching_metadata': {'key': 'value'} # or 'matching_metadata': [{'key': 'key', 'value': 'value'}] } """ if isinstance(alarm.get('rule'), dict): return alarm['type'] = 'threshold' alarm['rule'] = {} alarm['matching_metadata'] = cls._decode_matching_metadata( alarm['matching_metadata']) for field in [ 'period', 'evaluation_periods', 'threshold', 'statistic', 'comparison_operator', 'meter_name' ]: if field in alarm: alarm['rule'][field] = alarm[field] del alarm[field] query = [] for key in alarm['matching_metadata']: query.append({ 'field': key, 'op': 'eq', 'value': alarm['matching_metadata'][key], 'type': 'string' }) del alarm['matching_metadata'] alarm['rule']['query'] = query @staticmethod def _decode_matching_metadata(matching_metadata): if isinstance(matching_metadata, dict): # note(sileht): keep compatibility with alarm # with matching_metadata as a dict return matching_metadata else: new_matching_metadata = {} for elem in matching_metadata: new_matching_metadata[elem['key']] = elem['value'] return new_matching_metadata @staticmethod def _ensure_time_constraints(alarm): """Ensures the alarm has a time constraints field.""" if 'time_constraints' not in alarm: alarm['time_constraints'] = []
class Connection(base.Connection): CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): self.mc = monasca_client.Client(netutils.urlsplit(url)) self.mon_filter = MonascaDataFilter() @staticmethod def _convert_to_dict(stats, cols): return {c: stats[i] for i, c in enumerate(cols)} def _convert_metaquery(self, metaquery): """Strip "metadata." from key and convert value to string :param metaquery: { 'metadata.KEY': VALUE, ... } :returns: converted metaquery """ query = {} for k, v in metaquery.items(): key = k.split('.')[1] if isinstance(v, basestring): query[key] = v else: query[key] = str(int(v)) return query def _match_metaquery_to_value_meta(self, query, value_meta): """Check if metaquery matches value_meta :param query: metaquery with converted format :param value_meta: metadata from monasca :returns: True for matched, False for not matched """ if (len(query) > 0 and (len(value_meta) == 0 or not set(query.items()).issubset(set(value_meta.items())))): return False else: return True def upgrade(self): pass def clear(self): pass def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter. """ LOG.info( _('metering data %(counter_name)s for %(resource_id)s: ' '%(counter_volume)s') % ({ 'counter_name': data['counter_name'], 'resource_id': data['resource_id'], 'counter_volume': data['counter_volume'] })) metric = self.mon_filter.process_sample_for_monasca(data) self.mc.metrics_create(**metric) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ LOG.info(_("Dropping data with TTL %d"), ttl) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of dictionaries containing resource information. { 'resource_id': UUID of the resource, 'project_id': UUID of project owning the resource, 'user_id': UUID of user owning the resource, 'timestamp': UTC datetime of last update to the resource, 'metadata': most current metadata for the resource, 'meter': list of the meters reporting data for the resource, } :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') q = {} if metaquery: q = self._convert_metaquery(metaquery) if start_timestamp_op and start_timestamp_op != 'ge': raise ceilometer.NotImplementedError( ('Start time op %s ' 'not implemented') % start_timestamp_op) if end_timestamp_op and end_timestamp_op != 'le': raise ceilometer.NotImplementedError( ('End time op %s ' 'not implemented') % end_timestamp_op) if not start_timestamp: start_timestamp = timeutils.isotime(datetime.datetime(1970, 1, 1)) else: start_timestamp = timeutils.isotime(start_timestamp) if end_timestamp: end_timestamp = timeutils.isotime(end_timestamp) dims_filter = dict(user_id=user, project_id=project, source=source, resource_id=resource) dims_filter = {k: v for k, v in dims_filter.items() if v is not None} _search_args = dict(start_time=start_timestamp, end_time=end_timestamp, limit=1) _search_args = {k: v for k, v in _search_args.items() if v is not None} for metric in self.mc.metrics_list(**dict(dimensions=dims_filter)): _search_args['name'] = metric['name'] _search_args['dimensions'] = metric['dimensions'] try: for sample in self.mc.measurements_list(**_search_args): d = sample['dimensions'] m = self._convert_to_dict(sample['measurements'][0], sample['columns']) vm = m['value_meta'] if not self._match_metaquery_to_value_meta(q, vm): continue if d.get('resource_id'): yield api_models.Resource( resource_id=d.get('resource_id'), first_sample_timestamp=(timeutils.parse_isotime( m['timestamp'])), last_sample_timestamp=timeutils.utcnow(), project_id=d.get('project_id'), source=d.get('source'), user_id=d.get('user_id'), metadata=m['value_meta'], ) except monasca_exc.HTTPConflict: pass def get_meters(self, user=None, project=None, resource=None, source=None, limit=None, metaquery=None, pagination=None): """Return an iterable of dictionaries containing meter information. { 'name': name of the meter, 'type': type of the meter (gauge, delta, cumulative), 'resource_id': UUID of the resource, 'project_id': UUID of project owning the resource, 'user_id': UUID of user owning the resource, } :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param limit: Maximum number of results to return. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') if metaquery: raise ceilometer.NotImplementedError('Metaquery not implemented') _dimensions = dict(user_id=user, project_id=project, resource_id=resource, source=source) _dimensions = {k: v for k, v in _dimensions.items() if v is not None} _search_kwargs = {'dimensions': _dimensions} if limit: _search_kwargs['limit'] = limit for metric in self.mc.metrics_list(**_search_kwargs): yield api_models.Meter( name=metric['name'], type=metric['dimensions'].get('type') or 'cumulative', unit=metric['dimensions'].get('unit'), resource_id=metric['dimensions'].get('resource_id'), project_id=metric['dimensions'].get('project_id'), source=metric['dimensions'].get('source'), user_id=metric['dimensions'].get('user_id')) def get_samples(self, sample_filter, limit=None): """Return an iterable of dictionaries containing sample information. { 'source': source of the resource, 'counter_name': name of the resource, 'counter_type': type of the sample (gauge, delta, cumulative), 'counter_unit': unit of the sample, 'counter_volume': volume of the sample, 'user_id': UUID of user owning the resource, 'project_id': UUID of project owning the resource, 'resource_id': UUID of the resource, 'timestamp': timestamp of the sample, 'resource_metadata': metadata of the sample, 'message_id': message ID of the sample, 'message_signature': message signature of the sample, 'recorded_at': time the sample was recorded } :param sample_filter: constraints for the sample search. :param limit: Maximum number of results to return. """ if not sample_filter or not sample_filter.meter: raise ceilometer.NotImplementedError( "Supply meter name at the least") if (sample_filter.start_timestamp_op and sample_filter.start_timestamp_op != 'ge'): raise ceilometer.NotImplementedError( ('Start time op %s ' 'not implemented') % sample_filter.start_timestamp_op) if (sample_filter.end_timestamp_op and sample_filter.end_timestamp_op != 'le'): raise ceilometer.NotImplementedError( ('End time op %s ' 'not implemented') % sample_filter.end_timestamp_op) q = {} if sample_filter.metaquery: q = self._convert_metaquery(sample_filter.metaquery) if sample_filter.message_id: raise ceilometer.NotImplementedError('message_id not ' 'implemented ' 'in get_samples') if not sample_filter.start_timestamp: sample_filter.start_timestamp = \ timeutils.isotime(datetime.datetime(1970, 1, 1)) else: sample_filter.start_timestamp = \ timeutils.isotime(sample_filter.start_timestamp) if sample_filter.end_timestamp: sample_filter.end_timestamp = \ timeutils.isotime(sample_filter.end_timestamp) _dimensions = dict(user_id=sample_filter.user, project_id=sample_filter.project, resource_id=sample_filter.resource, source=sample_filter.source) _dimensions = {k: v for k, v in _dimensions.items() if v is not None} _search_args = dict( name=sample_filter.meter, start_time=sample_filter.start_timestamp, start_timestamp_op=(sample_filter.start_timestamp_op), end_time=sample_filter.end_timestamp, end_timestamp_op=sample_filter.end_timestamp_op, limit=limit, merge_metrics=True, dimensions=_dimensions) _search_args = {k: v for k, v in _search_args.items() if v is not None} for sample in self.mc.measurements_list(**_search_args): LOG.debug(_('Retrieved sample: %s'), sample) d = sample['dimensions'] for measurement in sample['measurements']: meas_dict = self._convert_to_dict(measurement, sample['columns']) vm = meas_dict['value_meta'] if not self._match_metaquery_to_value_meta(q, vm): continue yield api_models.Sample( source=d.get('source'), counter_name=sample['name'], counter_type=d.get('type'), counter_unit=d.get('unit'), counter_volume=meas_dict['value'], user_id=d.get('user_id'), project_id=d.get('project_id'), resource_id=d.get('resource_id'), timestamp=timeutils.parse_isotime(meas_dict['timestamp']), resource_metadata=meas_dict['value_meta'], message_id=sample['id'], message_signature='', recorded_at=(timeutils.parse_isotime( meas_dict['timestamp']))) def get_meter_statistics(self, filter, period=None, groupby=None, aggregate=None): """Return a dictionary containing meter statistics. Meter statistics is described by the query parameters. The filter must have a meter value set. { 'min': 'max': 'avg': 'sum': 'count': 'period': 'period_start': 'period_end': 'duration': 'duration_start': 'duration_end': } """ if filter: if not filter.meter: raise ceilometer.NotImplementedError('Query without meter ' 'not implemented') else: raise ceilometer.NotImplementedError('Query without filter ' 'not implemented') if groupby: raise ceilometer.NotImplementedError('Groupby not implemented') if filter.metaquery: raise ceilometer.NotImplementedError('Metaquery not implemented') if filter.message_id: raise ceilometer.NotImplementedError('Message_id query ' 'not implemented') if filter.start_timestamp_op and filter.start_timestamp_op != 'ge': raise ceilometer.NotImplementedError( ('Start time op %s ' 'not implemented') % filter.start_timestamp_op) if filter.end_timestamp_op and filter.end_timestamp_op != 'le': raise ceilometer.NotImplementedError( ('End time op %s ' 'not implemented') % filter.end_timestamp_op) if not filter.start_timestamp: filter.start_timestamp = timeutils.isotime( datetime.datetime(1970, 1, 1)) # TODO(monasca): Add this a config parameter allowed_stats = ['avg', 'min', 'max', 'sum', 'count'] if aggregate: not_allowed_stats = [ a.func for a in aggregate if a.func not in allowed_stats ] if not_allowed_stats: raise ceilometer.NotImplementedError( ('Aggregate function(s) ' '%s not implemented') % not_allowed_stats) statistics = [a.func for a in aggregate if a.func in allowed_stats] else: statistics = allowed_stats dims_filter = dict(user_id=filter.user, project_id=filter.project, source=filter.source, resource_id=filter.resource) dims_filter = {k: v for k, v in dims_filter.items() if v is not None} period = period if period \ else cfg.CONF.monasca.default_stats_period _search_args = dict(name=filter.meter, dimensions=dims_filter, start_time=filter.start_timestamp, end_time=filter.end_timestamp, period=period, statistics=','.join(statistics), merge_metrics=True) _search_args = {k: v for k, v in _search_args.items() if v is not None} stats_list = self.mc.statistics_list(**_search_args) for stats in stats_list: for s in stats['statistics']: stats_dict = self._convert_to_dict(s, stats['columns']) ts_start = timeutils.parse_isotime(stats_dict['timestamp']) ts_end = ts_start + datetime.timedelta(0, period) del stats_dict['timestamp'] if 'count' in stats_dict: stats_dict['count'] = int(stats_dict['count']) yield api_models.Statistics( unit=stats['dimensions'].get('unit'), period=period, period_start=ts_start, period_end=ts_end, duration=period, duration_start=ts_start, duration_end=ts_end, groupby={u'': u''}, **stats_dict)
class Connection(base.Connection): """Put the event data into a SQLAlchemy database. Tables:: - EventType - event definition - { id: event type id desc: description of event } - Event - event data - { id: event id message_id: message id generated = timestamp of event event_type_id = event type -> eventtype.id } - Trait - trait value - { event_id: event -> event.id trait_type_id: trait type -> traittype.id t_string: string value t_float: float value t_int: integer value t_datetime: timestamp value } - TraitType - trait definition - { id: trait id desc: description of trait data_type: data type (integer that maps to datatype) } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): self._engine_facade = db_session.EngineFacade( url, **dict(cfg.CONF.database.items())) def upgrade(self): # NOTE(gordc): to minimise memory, only import migration when needed from oslo.db.sqlalchemy import migration path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'storage', 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) self._engine_facade._session_maker.close_all() engine.dispose() def _get_or_create_trait_type(self, trait_type, data_type, session=None): """Find if this trait already exists in the database. If it does not, create a new entry in the trait type table. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): tt = session.query(models.TraitType).filter( models.TraitType.desc == trait_type, models.TraitType.data_type == data_type).first() if not tt: tt = models.TraitType(trait_type, data_type) session.add(tt) return tt def _make_trait(self, trait_model, event, session=None): """Make a new Trait from a Trait model. Doesn't flush or add to session. """ trait_type = self._get_or_create_trait_type(trait_model.name, trait_model.dtype, session) value_map = models.Trait._value_map values = { 't_string': None, 't_float': None, 't_int': None, 't_datetime': None } value = trait_model.value values[value_map[trait_model.dtype]] = value return models.Trait(trait_type, event, **values) def _get_or_create_event_type(self, event_type, session=None): """Check if an event type with the supplied name is already exists. If not, we create it and return the record. This may result in a flush. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) return et def _record_event(self, session, event_model): """Store a single Event, including related Traits.""" with session.begin(subtransactions=True): event_type = self._get_or_create_event_type(event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated) session.add(event) new_traits = [] if event_model.traits: for trait in event_model.traits: t = self._make_trait(trait, event, session=session) session.add(t) new_traits.append(t) # Note: we don't flush here, explicitly (unless a new trait or event # does it). Otherwise, just wait until all the Events are staged. return event, new_traits def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. Returns a list of events that could not be saved in a (reason, event) tuple. Reasons are enumerated in storage.model.Event Flush when they're all added, unless new EventTypes or TraitTypes are added along the way. """ session = self._engine_facade.get_session() events = [] problem_events = [] for event_model in event_models: event = None try: with session.begin(): event = self._record_event(session, event_model) except dbexc.DBDuplicateEntry as e: LOG.exception(_("Failed to record duplicated event: %s") % e) problem_events.append( (api_models.Event.DUPLICATE, event_model)) except Exception as e: LOG.exception(_('Failed to record event: %s') % e) problem_events.append( (api_models.Event.UNKNOWN_PROBLEM, event_model)) events.append(event) return problem_events def get_events(self, event_filter): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance """ start = event_filter.start_time end = event_filter.end_time session = self._engine_facade.get_session() LOG.debug(_("Getting events that match filter: %s") % event_filter) with session.begin(): event_query = session.query(models.Event) # Build up the join conditions event_join_conditions = [ models.EventType.id == models.Event.event_type_id ] if event_filter.event_type: event_join_conditions.append( models.EventType.desc == event_filter.event_type) event_query = event_query.join(models.EventType, sa.and_(*event_join_conditions)) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions.append( models.Event.message_id == event_filter.message_id) if start: event_filter_conditions.append(models.Event.generated >= start) if end: event_filter_conditions.append(models.Event.generated <= end) if event_filter_conditions: event_query = (event_query.filter( sa.and_(*event_filter_conditions))) event_models_dict = {} if event_filter.traits_filter: for trait_filter in event_filter.traits_filter: # Build a sub query that joins Trait to TraitType # where the trait name matches trait_name = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') conditions = [ models.Trait.trait_type_id == models.TraitType.id, models.TraitType.desc == trait_name ] for key, value in six.iteritems(trait_filter): sql_utils.trait_op_condition(conditions, key, value, op) trait_query = (session.query(models.Trait.event_id).join( models.TraitType, sa.and_(*conditions)).subquery()) event_query = (event_query.join( trait_query, models.Event.id == trait_query.c.event_id)) else: # If there are no trait filters, grab the events from the db query = (session.query(models.Event.id, models.Event.generated, models.Event.message_id, models.EventType.desc).join( models.EventType, sa.and_(*event_join_conditions))) if event_filter_conditions: query = query.filter(sa.and_(*event_filter_conditions)) for (id_, generated, message_id, desc_) in query.all(): event_models_dict[id_] = api_models.Event( message_id, desc_, generated, []) # Build event models for the events event_query = event_query.subquery() query = (session.query(models.Trait).join( models.TraitType, models.Trait.trait_type_id == models.TraitType.id).join( event_query, models.Trait.event_id == event_query.c.id)) # Now convert the sqlalchemy objects back into Models ... for trait in query.all(): event = event_models_dict.get(trait.event_id) if not event: event = api_models.Event(trait.event.message_id, trait.event.event_type.desc, trait.event.generated, []) event_models_dict[trait.event_id] = event trait_model = api_models.Trait(trait.trait_type.desc, trait.trait_type.data_type, trait.get_value()) event.append_trait(trait_model) event_models = event_models_dict.values() return sorted(event_models, key=operator.attrgetter('generated')) def get_event_types(self): """Return all event types as an iterable of strings.""" session = self._engine_facade.get_session() with session.begin(): query = (session.query(models.EventType.desc).order_by( models.EventType.desc)) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() LOG.debug(_("Get traits for %s") % event_type) with session.begin(): query = (session.query( models.TraitType.desc, models.TraitType.data_type).join( models.Trait, models.Trait.trait_type_id == models.TraitType.id).join( models.Event, models.Event.id == models.Trait.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.id, models.EventType.desc == event_type)).group_by( models.TraitType.desc, models.TraitType.data_type).distinct()) for desc_, dtype in query.all(): yield {'name': desc_, 'data_type': dtype} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): trait_type_filters = [ models.TraitType.id == models.Trait.trait_type_id ] if trait_type: trait_type_filters.append(models.TraitType.desc == trait_type) query = (session.query(models.Trait).join( models.TraitType, sa.and_(*trait_type_filters)).join( models.Event, models.Event.id == models.Trait.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type))) for trait in query.all(): type = trait.trait_type yield api_models.Trait(name=type.desc, dtype=type.data_type, value=trait.get_value())
class Connection(base.Connection): """Base event Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def record_events(self, event_models): """Write the events to database. :param event_models: a list of models.Event objects. """ error = None for event_model in event_models: traits = [] if event_model.traits: for trait in event_model.traits: traits.append({'trait_name': trait.name, 'trait_type': trait.dtype, 'trait_value': trait.value}) try: self.db.event.insert_one( {'_id': event_model.message_id, 'event_type': event_model.event_type, 'timestamp': event_model.generated, 'traits': traits, 'raw': event_model.raw}) except pymongo.errors.DuplicateKeyError as ex: LOG.info(_LI("Duplicate event detected, skipping it: %s") % ex) except Exception as ex: LOG.exception(_LE("Failed to record event: %s") % ex) error = ex if error: raise error def get_events(self, event_filter, limit=None): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. :param limit: Maximum number of results to return. """ if limit == 0: return q = pymongo_utils.make_events_query_from_filter(event_filter) if limit is not None: results = self.db.event.find(q, limit=limit) else: results = self.db.event.find(q) for event in results: traits = [] for trait in event['traits']: traits.append(models.Trait(name=trait['trait_name'], dtype=int(trait['trait_type']), value=trait['trait_value'])) yield models.Event(message_id=event['_id'], event_type=event['event_type'], generated=event['timestamp'], traits=traits, raw=event.get('raw')) def get_event_types(self): """Return all event types as an iter of strings.""" return self.db.event.distinct('event_type') def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event. """ trait_names = set() events = self.db.event.find({'event_type': event_type}) for event in events: for trait in event['traits']: trait_name = trait['trait_name'] if trait_name not in trait_names: # Here we check that our method return only unique # trait types. Method will return only one trait type. It # is proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) yield {'name': trait_name, 'data_type': trait['trait_type']} def get_traits(self, event_type, trait_name=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_name: the name of the Trait to filter by """ if not trait_name: events = self.db.event.find({'event_type': event_type}) else: # We choose events that simultaneously have event_type and certain # trait_name, and retrieve events contains only mentioned traits. events = self.db.event.find({'$and': [{'event_type': event_type}, {'traits.trait_name': trait_name}]}, {'traits': {'$elemMatch': {'trait_name': trait_name}} }) for event in events: for trait in event['traits']: yield models.Trait(name=trait['trait_name'], dtype=trait['trait_type'], value=trait['trait_value'])
class Connection(pymongo_base.Connection): """Put the data into a MongoDB database Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() STANDARD_AGGREGATES = dict([(a.name, a) for a in [ pymongo_utils.SUM_AGGREGATION, pymongo_utils.AVG_AGGREGATION, pymongo_utils.MIN_AGGREGATION, pymongo_utils.MAX_AGGREGATION, pymongo_utils.COUNT_AGGREGATION, ]]) AGGREGATES = dict([(a.name, a) for a in [ pymongo_utils.SUM_AGGREGATION, pymongo_utils.AVG_AGGREGATION, pymongo_utils.MIN_AGGREGATION, pymongo_utils.MAX_AGGREGATION, pymongo_utils.COUNT_AGGREGATION, pymongo_utils.STDDEV_AGGREGATION, pymongo_utils.CARDINALITY_AGGREGATION, ]]) SORT_OPERATION_MAPPING = { 'desc': (pymongo.DESCENDING, '$lt'), 'asc': (pymongo.ASCENDING, '$gt') } MAP_RESOURCES = bson.code.Code(""" function () { emit(this.resource_id, {user_id: this.user_id, project_id: this.project_id, source: this.source, first_timestamp: this.timestamp, last_timestamp: this.timestamp, metadata: this.resource_metadata}) }""") REDUCE_RESOURCES = bson.code.Code(""" function (key, values) { var merge = {user_id: values[0].user_id, project_id: values[0].project_id, source: values[0].source, first_timestamp: values[0].first_timestamp, last_timestamp: values[0].last_timestamp, metadata: values[0].metadata} values.forEach(function(value) { if (merge.first_timestamp - value.first_timestamp > 0) { merge.first_timestamp = value.first_timestamp; merge.user_id = value.user_id; merge.project_id = value.project_id; merge.source = value.source; } else if (merge.last_timestamp - value.last_timestamp <= 0) { merge.last_timestamp = value.last_timestamp; merge.metadata = value.metadata; } }); return merge; }""") _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1) _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR, month=12, day=31, hour=23, minute=59, second=59) def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instantiate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) self.version = self.conn.server_info()['versionArray'] # Require MongoDB 2.4 to use $setOnInsert if self.version < pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION: raise storage.StorageBadVersion( "Need at least MongoDB %s" % pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION) connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correctly updated if # needed. self.upgrade() @staticmethod def update_ttl(ttl, ttl_index_name, index_field, coll): """Update or create time_to_live indexes. :param ttl: time to live in seconds. :param ttl_index_name: name of the index we want to update or create. :param index_field: field with the index that we need to update. :param coll: collection which indexes need to be updated. """ indexes = coll.index_information() if ttl <= 0: if ttl_index_name in indexes: coll.drop_index(ttl_index_name) return if ttl_index_name in indexes: return coll.database.command('collMod', coll.name, index={ 'keyPattern': { index_field: pymongo.ASCENDING }, 'expireAfterSeconds': ttl }) coll.create_index([(index_field, pymongo.ASCENDING)], expireAfterSeconds=ttl, name=ttl_index_name) def upgrade(self): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. # create collection if not present if 'resource' not in self.db.conn.collection_names(): self.db.conn.create_collection('resource') if 'meter' not in self.db.conn.collection_names(): self.db.conn.create_collection('meter') name_qualifier = dict(user_id='', project_id='project_') background = dict(user_id=False, project_id=True) for primary in ['user_id', 'project_id']: name = 'meter_%sidx' % name_qualifier[primary] self.db.meter.create_index([ ('resource_id', pymongo.ASCENDING), (primary, pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ], name=name, background=background[primary]) self.db.meter.create_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') # NOTE(ityaptin) This index covers get_resource requests sorting # and MongoDB uses part of this compound index for different # queries based on any of user_id, project_id, last_sample_timestamp # fields self.db.resource.create_index( [('user_id', pymongo.DESCENDING), ('project_id', pymongo.DESCENDING), ('last_sample_timestamp', pymongo.DESCENDING)], name='resource_user_project_timestamp', ) self.db.resource.create_index( [('last_sample_timestamp', pymongo.DESCENDING)], name='last_sample_timestamp_idx') # update or create time_to_live index ttl = cfg.CONF.database.metering_time_to_live self.update_ttl(ttl, 'meter_ttl', 'timestamp', self.db.meter) self.update_ttl(ttl, 'resource_ttl', 'last_sample_timestamp', self.db.resource) def clear(self): self.conn.drop_database(self.db.name) # Connection will be reopened automatically if needed self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata - we use $setOnInsert to # unconditionally insert sample timestamps and resource metadata # (in the update case, this must be conditional on the sample not # being out-of-order) data = copy.deepcopy(data) data['resource_metadata'] = pymongo_utils.improve_keys( data.pop('resource_metadata')) resource = self.db.resource.find_one_and_update( {'_id': data['resource_id']}, { '$set': { 'project_id': data['project_id'], 'user_id': data['user_id'], 'source': data['source'], }, '$setOnInsert': { 'metadata': data['resource_metadata'], 'first_sample_timestamp': data['timestamp'], 'last_sample_timestamp': data['timestamp'], }, '$addToSet': { 'meter': { 'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, return_document=pymongo.ReturnDocument.AFTER, ) # only update last sample timestamp if actually later (the usual # in-order case) last_sample_timestamp = resource.get('last_sample_timestamp') if (last_sample_timestamp is None or last_sample_timestamp <= data['timestamp']): self.db.resource.update_one({'_id': data['resource_id']}, { '$set': { 'metadata': data['resource_metadata'], 'last_sample_timestamp': data['timestamp'] } }) # only update first sample timestamp if actually earlier (the unusual # out-of-order case) # NOTE: a null first sample timestamp is not updated as this indicates # a pre-existing resource document dating from before we started # recording these timestamps in the resource collection first_sample_timestamp = resource.get('first_sample_timestamp') if (first_sample_timestamp is not None and first_sample_timestamp > data['timestamp']): self.db.resource.update_one( {'_id': data['resource_id']}, {'$set': { 'first_sample_timestamp': data['timestamp'] }}) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() self.db.meter.insert_one(record) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs with native MongoDB time-to-live feature. """ LOG.debug("Clearing expired metering data is based on native " "MongoDB time to live feature and going in background.") @staticmethod def _get_marker(db_collection, marker_pairs): """Return the mark document according to the attribute-value pairs. :param db_collection: Database collection that be query. :param maker_pairs: Attribute-value pairs filter. """ if db_collection is None: return if not marker_pairs: return ret = db_collection.find(marker_pairs, limit=2) if ret.count() == 0: raise base.NoResultFound elif ret.count() > 1: raise base.MultipleResultsFound else: _ret = ret.__getitem__(0) return _ret @classmethod def _recurse_sort_keys(cls, sort_keys, marker, flag): _first = sort_keys[0] value = marker[_first] if len(sort_keys) == 1: return {_first: {flag: value}} else: criteria_equ = {_first: {'eq': value}} criteria_cmp = cls._recurse_sort_keys(sort_keys[1:], marker, flag) return dict(criteria_equ, **criteria_cmp) @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction and paging operator. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort instructions and paging operator """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get( sort_dir, cls.SORT_OPERATION_MAPPING['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions, operation def _get_time_constrained_resources(self, query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource, limit): """Return an iterable of models.Resource instances Items are constrained by sample timestamp. :param query: project/user/source query :param start_timestamp: modified timestamp start range. :param start_timestamp_op: start time operator, like gt, ge. :param end_timestamp: modified timestamp end range. :param end_timestamp_op: end time operator, like lt, le. :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['resource_id'] = resource # Add resource_ prefix so it matches the field in the db query.update( dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) # FIXME(dhellmann): This may not perform very well, # but doing any better will require changing the database # schema and that will need more thought than I have time # to put into it today. # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range(start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: query['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource') sort_instructions = self._build_sort_instructions(sort_keys)[0] # use a unique collection name for the results collection, # as result post-sorting (as oppposed to reduce pre-sorting) # is not possible on an inline M-R out = 'resource_list_%s' % uuid.uuid4() self.db.meter.map_reduce(self.MAP_RESOURCES, self.REDUCE_RESOURCES, out=out, sort={'resource_id': 1}, query=query) try: if limit is not None: results = self.db[out].find(sort=sort_instructions, limit=limit) else: results = self.db[out].find(sort=sort_instructions) for r in results: resource = r['value'] yield models.Resource( resource_id=r['_id'], user_id=resource['user_id'], project_id=resource['project_id'], first_sample_timestamp=resource['first_timestamp'], last_sample_timestamp=resource['last_timestamp'], source=resource['source'], metadata=pymongo_utils.unquote_keys(resource['metadata'])) finally: self.db[out].drop() def _get_floating_resources(self, query, metaquery, resource, limit): """Return an iterable of models.Resource instances Items are unconstrained by timestamp. :param query: project/user/source query :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['_id'] = resource query.update(dict((k, v) for (k, v) in six.iteritems(metaquery))) keys = base._handle_sort_key('resource') sort_keys = [ 'last_sample_timestamp' if i == 'timestamp' else i for i in keys ] sort_instructions = self._build_sort_instructions(sort_keys)[0] if limit is not None: results = self.db.resource.find(query, sort=sort_instructions, limit=limit) else: results = self.db.resource.find(query, sort=sort_instructions) for r in results: yield models.Resource( resource_id=r['_id'], user_id=r['user_id'], project_id=r['project_id'], first_sample_timestamp=r.get('first_sample_timestamp', self._GENESIS), last_sample_timestamp=r.get('last_sample_timestamp', self._APOCALYPSE), source=r['source'], metadata=pymongo_utils.unquote_keys(r['metadata'])) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, limit=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param limit: Maximum number of results to return. """ if limit == 0: return metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {} query = {} if user is not None: query['user_id'] = user if project is not None: query['project_id'] = project if source is not None: query['source'] = source if start_timestamp or end_timestamp: return self._get_time_constrained_resources( query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource, limit) else: return self._get_floating_resources(query, metaquery, resource, limit) @staticmethod def _make_period_dict(period, first_ts): """Create a period field for _id of grouped fields. :param period: Period duration in seconds :param first_ts: First timestamp for first period :return: """ if period >= 0: period_unique_dict = { "period_start": { "$divide": [{ "$subtract": [{ "$subtract": ["$timestamp", first_ts] }, { "$mod": [{ "$subtract": ["$timestamp", first_ts] }, period * 1000] }] }, period * 1000] } } else: # Note(ityaptin) Hack for older MongoDB versions (2.4.+ and older). # Since 2.6+ we could use $literal operator period_unique_dict = {"$period_start": {"$add": [0, 0]}} return period_unique_dict def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set([ 'user_id', 'project_id', 'resource_id', 'source', 'resource_metadata.instance_type' ])): raise ceilometer.NotImplementedError( "Unable to group by these fields") q = pymongo_utils.make_query_from_filter(sample_filter) group_stage = {} project_stage = { "unit": "$_id.unit", "name": "$_id.name", "first_timestamp": "$first_timestamp", "last_timestamp": "$last_timestamp", "period_start": "$_id.period_start", } # Add timestamps to $group stage group_stage.update({ "first_timestamp": { "$min": "$timestamp" }, "last_timestamp": { "$max": "$timestamp" } }) # Define a _id field for grouped documents unique_group_field = {"name": "$counter_name", "unit": "$counter_unit"} # Define a first timestamp for periods if sample_filter.start_timestamp: first_timestamp = sample_filter.start_timestamp else: first_timestamp_cursor = self.db.meter.find(limit=1, sort=[ ('timestamp', pymongo.ASCENDING) ]) if first_timestamp_cursor.count(): first_timestamp = first_timestamp_cursor[0]['timestamp'] else: first_timestamp = utils.EPOCH_TIME # Add a start_period field to unique identifier of grouped documents if period: period_dict = self._make_period_dict(period, first_timestamp) unique_group_field.update(period_dict) # Add a groupby fields to unique identifier of grouped documents if groupby: unique_group_field.update( dict((field.replace(".", "/"), "$%s" % field) for field in groupby)) group_stage.update({"_id": unique_group_field}) self._compile_aggregate_stages(aggregate, group_stage, project_stage) # Aggregation stages list. It's work one by one and uses documents # from previous stages. aggregation_query = [{ '$match': q }, { "$sort": { "timestamp": 1 } }, { "$group": group_stage }, { "$sort": { "_id.period_start": 1 } }, { "$project": project_stage }] # results is dict in pymongo<=2.6.3 and CommandCursor in >=3.0 results = self.db.meter.aggregate(aggregation_query, **self._make_aggregation_params()) return [ self._stats_result_to_model(point, groupby, aggregate, period, first_timestamp) for point in self._get_results(results) ] def _stats_result_aggregates(self, result, aggregate): stats_args = {} for attr in Connection.STANDARD_AGGREGATES.keys(): if attr in result: stats_args[attr] = result[attr] if aggregate: stats_args['aggregate'] = {} for agr in aggregate: stats_args['aggregate'].update( Connection.AGGREGATES[agr.func].finalize( result, agr.param, self.version)) return stats_args def _stats_result_to_model(self, result, groupby, aggregate, period, first_timestamp): if period is None: period = 0 first_timestamp = pymongo_utils.from_unix_timestamp(first_timestamp) stats_args = self._stats_result_aggregates(result, aggregate) stats_args['unit'] = result['unit'] stats_args['duration'] = (result["last_timestamp"] - result["first_timestamp"]).total_seconds() stats_args['duration_start'] = result['first_timestamp'] stats_args['duration_end'] = result['last_timestamp'] stats_args['period'] = period start = result.get("period_start", 0) * period stats_args['period_start'] = (first_timestamp + datetime.timedelta(seconds=start)) stats_args['period_end'] = (first_timestamp + datetime.timedelta(seconds=start + period) if period else result['last_timestamp']) stats_args['groupby'] = (dict( (g, result['_id'].get(g.replace(".", "/"))) for g in groupby) if groupby else None) return models.Statistics(**stats_args) def _compile_aggregate_stages(self, aggregate, group_stage, project_stage): if not aggregate: for aggregation in Connection.STANDARD_AGGREGATES.values(): group_stage.update( aggregation.group(version_array=self.version)) project_stage.update( aggregation.project(version_array=self.version)) else: for description in aggregate: aggregation = Connection.AGGREGATES.get(description.func) if aggregation: if not aggregation.validate(description.param): raise storage.StorageBadAggregate( 'Bad aggregate: %s.%s' % (description.func, description.param)) group_stage.update( aggregation.group(description.param, version_array=self.version)) project_stage.update( aggregation.project(description.param, version_array=self.version)) @staticmethod def _get_results(results): if isinstance(results, dict): return results.get('result', []) else: return results def _make_aggregation_params(self): if self.version >= pymongo_utils.COMPLETE_AGGREGATE_COMPATIBLE_VERSION: return {"allowDiskUse": True} return {}
class Connection(base.Connection): """Put the data into a SQLAlchemy database. Tables:: - meter - meter definition - { id: meter id name: meter name type: meter type unit: meter unit } - resource - resource definition - { internal_id: resource id resource_id: resource uuid user_id: user uuid project_id: project uuid source_id: source id resource_metadata: metadata dictionary metadata_hash: metadata dictionary hash } - sample - the raw incoming data - { id: sample id meter_id: meter id (->meter.id) resource_id: resource id (->resource.internal_id) volume: sample volume timestamp: datetime recorded_at: datetime message_signature: message signature message_id: message uuid } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): self._engine_facade = db_session.EngineFacade( url, **dict(cfg.CONF.database.items()) ) def upgrade(self): path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) self._engine_facade._session_maker.close_all() engine.dispose() @staticmethod def _create_meter(session, name, type, unit): # TODO(gordc): implement lru_cache to improve performance try: nested = session.connection().dialect.name != 'sqlite' with session.begin(nested=nested, subtransactions=not nested): obj = (session.query(models.Meter) .filter(models.Meter.name == name) .filter(models.Meter.type == type) .filter(models.Meter.unit == unit).first()) if obj is None: obj = models.Meter(name=name, type=type, unit=unit) session.add(obj) except dbexc.DBDuplicateEntry: # retry function to pick up duplicate committed object obj = Connection._create_meter(session, name, type, unit) return obj @staticmethod def _create_resource(session, res_id, user_id, project_id, source_id, rmeta): # TODO(gordc): implement lru_cache to improve performance try: nested = session.connection().dialect.name != 'sqlite' m_hash = jsonutils.dumps(rmeta, sort_keys=True) with session.begin(nested=nested, subtransactions=not nested): obj = (session.query(models.Resource.internal_id) .filter(models.Resource.resource_id == res_id) .filter(models.Resource.user_id == user_id) .filter(models.Resource.project_id == project_id) .filter(models.Resource.source_id == source_id) .filter(models.Resource.metadata_hash == hashlib.md5(m_hash).hexdigest()).first()) obj_id = obj[0] if obj else None if obj_id is None: obj = models.Resource(resource_id=res_id, user_id=user_id, project_id=project_id, source_id=source_id, resource_metadata=rmeta) session.add(obj) session.flush() obj_id = obj.internal_id if rmeta and isinstance(rmeta, dict): meta_map = {} for key, v in utils.dict_to_keyval(rmeta): try: _model = sql_utils.META_TYPE_MAP[type(v)] if meta_map.get(_model) is None: meta_map[_model] = [] meta_map[_model].append( {'id': obj_id, 'meta_key': key, 'value': v}) except KeyError: LOG.warn(_("Unknown metadata type. Key (%s) " "will not be queryable."), key) for _model in meta_map.keys(): session.execute(_model.__table__.insert(), meta_map[_model]) except dbexc.DBDuplicateEntry: # retry function to pick up duplicate committed object obj_id = Connection._create_resource(session, res_id, user_id, project_id, source_id, rmeta) return obj_id def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ session = self._engine_facade.get_session() with session.begin(): # Record the raw data for the sample. meter = self._create_meter(session, data['counter_name'], data['counter_type'], data['counter_unit']) res_id = self._create_resource(session, data['resource_id'], data['user_id'], data['project_id'], data['source'], data['resource_metadata']) sample = models.Sample( meter_id=meter.id, resource_id=res_id, timestamp=data['timestamp'], volume=data['counter_volume'], message_signature=data['message_signature'], message_id=data['message_id']) session.add(sample) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ session = self._engine_facade.get_session() with session.begin(): end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) sample_q = (session.query(models.Sample) .filter(models.Sample.timestamp < end)) sample_subq = sample_q.subquery() for table in [models.MetaText, models.MetaBigInt, models.MetaFloat, models.MetaBool]: (session.query(table) .join(sample_subq, sample_subq.c.id == table.id) .delete()) rows = sample_q.delete() # remove Meter definitions with no matching samples (session.query(models.Meter) .filter(~models.Meter.samples.any()) .delete(synchronize_session='fetch')) (session.query(models.Resource) .filter(~models.Resource.samples.any()) .delete(synchronize_session='fetch')) LOG.info(_("%d samples removed from database"), rows) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of api_models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') s_filter = storage.SampleFilter(user=user, project=project, source=source, start=start_timestamp, start_timestamp_op=start_timestamp_op, end=end_timestamp, end_timestamp_op=end_timestamp_op, metaquery=metaquery, resource=resource) session = self._engine_facade.get_session() # get list of resource_ids res_q = session.query(distinct(models.Resource.resource_id)).join( models.Sample, models.Sample.resource_id == models.Resource.internal_id) res_q = make_query_from_filter(session, res_q, s_filter, require_meter=False) for res_id in res_q.all(): # get latest Sample max_q = (session.query(models.Sample) .join(models.Resource, models.Resource.internal_id == models.Sample.resource_id) .filter(models.Resource.resource_id == res_id[0])) max_q = make_query_from_filter(session, max_q, s_filter, require_meter=False) max_q = max_q.order_by(models.Sample.timestamp.desc(), models.Sample.id.desc()).limit(1) # get the min timestamp value. min_q = (session.query(models.Sample.timestamp) .join(models.Resource, models.Resource.internal_id == models.Sample.resource_id) .filter(models.Resource.resource_id == res_id[0])) min_q = make_query_from_filter(session, min_q, s_filter, require_meter=False) min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1) sample = max_q.first() if sample: yield api_models.Resource( resource_id=sample.resource.resource_id, project_id=sample.resource.project_id, first_sample_timestamp=min_q.first().timestamp, last_sample_timestamp=sample.timestamp, source=sample.resource.source_id, user_id=sample.resource.user_id, metadata=sample.resource.resource_metadata ) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of api_models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional ID of the resource. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') s_filter = storage.SampleFilter(user=user, project=project, source=source, metaquery=metaquery, resource=resource) # NOTE(gordc): get latest sample of each meter/resource. we do not # filter here as we want to filter only on latest record. session = self._engine_facade.get_session() subq = session.query(func.max(models.Sample.id).label('id')).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id).group_by( models.Sample.meter_id, models.Resource.resource_id) if resource: subq = subq.filter(models.Resource.resource_id == resource) subq = subq.subquery() # get meter details for samples. query_sample = (session.query(models.Sample.meter_id, models.Meter.name, models.Meter.type, models.Meter.unit, models.Resource.resource_id, models.Resource.project_id, models.Resource.source_id, models.Resource.user_id).join( subq, subq.c.id == models.Sample.id) .join(models.Meter, models.Meter.id == models.Sample.meter_id) .join(models.Resource, models.Resource.internal_id == models.Sample.resource_id)) query_sample = make_query_from_filter(session, query_sample, s_filter, require_meter=False) for row in query_sample.all(): yield api_models.Meter( name=row.name, type=row.type, unit=row.unit, resource_id=row.resource_id, project_id=row.project_id, source=row.source_id, user_id=row.user_id) def _retrieve_samples(self, query): samples = query.all() for s in samples: # Remove the id generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. yield api_models.Sample( source=s.source_id, counter_name=s.counter_name, counter_type=s.counter_type, counter_unit=s.counter_unit, counter_volume=s.counter_volume, user_id=s.user_id, project_id=s.project_id, resource_id=s.resource_id, timestamp=s.timestamp, recorded_at=s.recorded_at, resource_metadata=s.resource_metadata, message_id=s.message_id, message_signature=s.message_signature, ) def get_samples(self, sample_filter, limit=None): """Return an iterable of api_models.Samples. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] session = self._engine_facade.get_session() query = session.query(models.Sample.timestamp, models.Sample.recorded_at, models.Sample.message_id, models.Sample.message_signature, models.Sample.volume.label('counter_volume'), models.Meter.name.label('counter_name'), models.Meter.type.label('counter_type'), models.Meter.unit.label('counter_unit'), models.Resource.source_id, models.Resource.user_id, models.Resource.project_id, models.Resource.resource_metadata, models.Resource.resource_id).join( models.Meter, models.Meter.id == models.Sample.meter_id).join( models.Resource, models.Resource.internal_id == models.Sample.resource_id).order_by( models.Sample.timestamp.desc()) query = make_query_from_filter(session, query, sample_filter, require_meter=False) if limit: query = query.limit(limit) return self._retrieve_samples(query) def query_samples(self, filter_expr=None, orderby=None, limit=None): if limit == 0: return [] session = self._engine_facade.get_session() query = session.query(models.FullSample) transformer = sql_utils.QueryTransformer(models.FullSample, query) if filter_expr is not None: transformer.apply_filter(filter_expr) transformer.apply_options(orderby, limit) return self._retrieve_samples(transformer.get_query()) @staticmethod def _get_aggregate_functions(aggregate): if not aggregate: return [f for f in STANDARD_AGGREGATES.values()] functions = [] for a in aggregate: if a.func in STANDARD_AGGREGATES: functions.append(STANDARD_AGGREGATES[a.func]) elif a.func in UNPARAMETERIZED_AGGREGATES: functions.append(UNPARAMETERIZED_AGGREGATES[a.func]) elif a.func in PARAMETERIZED_AGGREGATES['compute']: validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func) if not (validate and validate(a.param)): raise storage.StorageBadAggregate('Bad aggregate: %s.%s' % (a.func, a.param)) compute = PARAMETERIZED_AGGREGATES['compute'][a.func] functions.append(compute(a.param)) else: raise NotImplementedError('Selectable aggregate function %s' ' is not supported' % a.func) return functions def _make_stats_query(self, sample_filter, groupby, aggregate): select = [ func.min(models.Sample.timestamp).label('tsmin'), func.max(models.Sample.timestamp).label('tsmax'), models.Meter.unit ] select.extend(self._get_aggregate_functions(aggregate)) session = self._engine_facade.get_session() if groupby: group_attributes = [getattr(models.Resource, g) for g in groupby] select.extend(group_attributes) query = (session.query(*select) .join(models.Meter, models.Meter.id == models.Sample.meter_id) .join( models.Resource, models.Resource.internal_id == models.Sample.resource_id) .group_by(models.Meter.unit)) if groupby: query = query.group_by(*group_attributes) return make_query_from_filter(session, query, sample_filter) @staticmethod def _stats_result_aggregates(result, aggregate): stats_args = {} if isinstance(result.count, (int, long)): stats_args['count'] = result.count for attr in ['min', 'max', 'sum', 'avg']: if hasattr(result, attr): stats_args[attr] = getattr(result, attr) if aggregate: stats_args['aggregate'] = {} for a in aggregate: key = '%s%s' % (a.func, '/%s' % a.param if a.param else '') stats_args['aggregate'][key] = getattr(result, key) return stats_args @staticmethod def _stats_result_to_model(result, period, period_start, period_end, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result.unit duration = (timeutils.delta_seconds(result.tsmin, result.tsmax) if result.tsmin is not None and result.tsmax is not None else None) stats_args['duration'] = duration stats_args['duration_start'] = result.tsmin stats_args['duration_end'] = result.tsmax stats_args['period'] = period stats_args['period_start'] = period_start stats_args['period_end'] = period_end stats_args['groupby'] = (dict( (g, getattr(result, g)) for g in groupby) if groupby else None) return api_models.Statistics(**stats_args) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of api_models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if groupby: for group in groupby: if group not in ['user_id', 'project_id', 'resource_id']: raise NotImplementedError('Unable to group by ' 'these fields') if not period: for res in self._make_stats_query(sample_filter, groupby, aggregate): if res.count: yield self._stats_result_to_model(res, 0, res.tsmin, res.tsmax, groupby, aggregate) return if not sample_filter.start or not sample_filter.end: res = self._make_stats_query(sample_filter, None, aggregate).first() if not res: # NOTE(liusheng):The 'res' may be NoneType, because no # sample has found with sample filter(s). return query = self._make_stats_query(sample_filter, groupby, aggregate) # HACK(jd) This is an awful method to compute stats by period, but # since we're trying to be SQL agnostic we have to write portable # code, so here it is, admire! We're going to do one request to get # stats by period. We would like to use GROUP BY, but there's no # portable way to manipulate timestamp in SQL, so we can't. for period_start, period_end in base.iter_period( sample_filter.start or res.tsmin, sample_filter.end or res.tsmax, period): q = query.filter(models.Sample.timestamp >= period_start) q = q.filter(models.Sample.timestamp < period_end) for r in q.all(): if r.count: yield self._stats_result_to_model( result=r, period=int(timeutils.delta_seconds(period_start, period_end)), period_start=period_start, period_end=period_end, groupby=groupby, aggregate=aggregate ) def _get_or_create_trait_type(self, trait_type, data_type, session=None): """Find if this trait already exists in the database. If it does not, create a new entry in the trait type table. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): tt = session.query(models.TraitType).filter( models.TraitType.desc == trait_type, models.TraitType.data_type == data_type).first() if not tt: tt = models.TraitType(trait_type, data_type) session.add(tt) return tt def _make_trait(self, trait_model, event, session=None): """Make a new Trait from a Trait model. Doesn't flush or add to session. """ trait_type = self._get_or_create_trait_type(trait_model.name, trait_model.dtype, session) value_map = models.Trait._value_map values = {'t_string': None, 't_float': None, 't_int': None, 't_datetime': None} value = trait_model.value values[value_map[trait_model.dtype]] = value return models.Trait(trait_type, event, **values) def _get_or_create_event_type(self, event_type, session=None): """Check if an event type with the supplied name is already exists. If not, we create it and return the record. This may result in a flush. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) return et def _record_event(self, session, event_model): """Store a single Event, including related Traits.""" with session.begin(subtransactions=True): event_type = self._get_or_create_event_type(event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated) session.add(event) new_traits = [] if event_model.traits: for trait in event_model.traits: t = self._make_trait(trait, event, session=session) session.add(t) new_traits.append(t) # Note: we don't flush here, explicitly (unless a new trait or event # does it). Otherwise, just wait until all the Events are staged. return event, new_traits def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. Returns a list of events that could not be saved in a (reason, event) tuple. Reasons are enumerated in storage.model.Event Flush when they're all added, unless new EventTypes or TraitTypes are added along the way. """ session = self._engine_facade.get_session() events = [] problem_events = [] for event_model in event_models: event = None try: with session.begin(): event = self._record_event(session, event_model) except dbexc.DBDuplicateEntry as e: LOG.exception(_("Failed to record duplicated event: %s") % e) problem_events.append((api_models.Event.DUPLICATE, event_model)) except Exception as e: LOG.exception(_('Failed to record event: %s') % e) problem_events.append((api_models.Event.UNKNOWN_PROBLEM, event_model)) events.append(event) return problem_events def get_events(self, event_filter): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance """ start = event_filter.start_time end = event_filter.end_time session = self._engine_facade.get_session() LOG.debug(_("Getting events that match filter: %s") % event_filter) with session.begin(): event_query = session.query(models.Event) # Build up the join conditions event_join_conditions = [models.EventType.id == models.Event.event_type_id] if event_filter.event_type: event_join_conditions.append(models.EventType.desc == event_filter.event_type) event_query = event_query.join(models.EventType, and_(*event_join_conditions)) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions.append(models.Event.message_id == event_filter.message_id) if start: event_filter_conditions.append(models.Event.generated >= start) if end: event_filter_conditions.append(models.Event.generated <= end) if event_filter_conditions: event_query = (event_query. filter(and_(*event_filter_conditions))) event_models_dict = {} if event_filter.traits_filter: for trait_filter in event_filter.traits_filter: # Build a sub query that joins Trait to TraitType # where the trait name matches trait_name = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') conditions = [models.Trait.trait_type_id == models.TraitType.id, models.TraitType.desc == trait_name] for key, value in six.iteritems(trait_filter): sql_utils.trait_op_condition(conditions, key, value, op) trait_query = (session.query(models.Trait.event_id). join(models.TraitType, and_(*conditions)).subquery()) event_query = (event_query. join(trait_query, models.Event.id == trait_query.c.event_id)) else: # If there are no trait filters, grab the events from the db query = (session.query(models.Event.id, models.Event.generated, models.Event.message_id, models.EventType.desc). join(models.EventType, and_(*event_join_conditions))) if event_filter_conditions: query = query.filter(and_(*event_filter_conditions)) for (id_, generated, message_id, desc_) in query.all(): event_models_dict[id_] = api_models.Event(message_id, desc_, generated, []) # Build event models for the events event_query = event_query.subquery() query = (session.query(models.Trait). join(models.TraitType, models.Trait.trait_type_id == models.TraitType.id). join(event_query, models.Trait.event_id == event_query.c.id)) # Now convert the sqlalchemy objects back into Models ... for trait in query.all(): event = event_models_dict.get(trait.event_id) if not event: event = api_models.Event( trait.event.message_id, trait.event.event_type.desc, trait.event.generated, []) event_models_dict[trait.event_id] = event trait_model = api_models.Trait(trait.trait_type.desc, trait.trait_type.data_type, trait.get_value()) event.append_trait(trait_model) event_models = event_models_dict.values() return sorted(event_models, key=operator.attrgetter('generated')) def get_event_types(self): """Return all event types as an iterable of strings.""" session = self._engine_facade.get_session() with session.begin(): query = (session.query(models.EventType.desc). order_by(models.EventType.desc)) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() LOG.debug(_("Get traits for %s") % event_type) with session.begin(): query = (session.query(models.TraitType.desc, models.TraitType.data_type) .join(models.Trait, models.Trait.trait_type_id == models.TraitType.id) .join(models.Event, models.Event.id == models.Trait.event_id) .join(models.EventType, and_(models.EventType.id == models.Event.id, models.EventType.desc == event_type)) .group_by(models.TraitType.desc, models.TraitType.data_type) .distinct()) for desc_, dtype in query.all(): yield {'name': desc_, 'data_type': dtype} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): trait_type_filters = [models.TraitType.id == models.Trait.trait_type_id] if trait_type: trait_type_filters.append(models.TraitType.desc == trait_type) query = (session.query(models.Trait) .join(models.TraitType, and_(*trait_type_filters)) .join(models.Event, models.Event.id == models.Trait.event_id) .join(models.EventType, and_(models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type))) for trait in query.all(): type = trait.trait_type yield api_models.Trait(name=type.desc, dtype=type.data_type, value=trait.get_value())
class Connection(base.Connection): """Get Ceilometer data from InfluxDB and ElasticSearch databases. Samples are stored in the following format in InfluxDB: - measurement: sample - tags (indexed): user_id, resource_id, project_id, source and configured metadata fields - fields (not indexed): counter_type -> type, counter_unit -> unit, counter_volume -> value, counter_name -> meter, message_id, message_signature, timestamp and recorded_at. Resources and meters are stored in ElasticSearch. Resources: { "_index": "ceilometer_resource", "_type": "<source>", "_id": "<resource_id>", "_source":{ "first_sample_timestamp": "<datetime in isoformat>", "last_sample_timestamp": "<datetime in isoformat>", "project_id": "<project_id>", "user_id": "<user_id>", "metadata": { "foo" : "bar", "foofoo" : {"barbar": {"foo": "bar"}} }, "meters": {"<meter_name>": {"unit": "<meter_unit>", "type": "<meter_type>"} } } This class has 'record_metering_data' implementation, but it is used only for testing needs. In real life, data will be recorded by StackLight """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) resource_index = "ceilometer_resource" _refresh_on_write = False def __init__(self, url): if cfg.CONF.database.resource_connection: url_split = netutils.urlsplit( cfg.CONF.database.resource_connection) self.resource_connection = es.Elasticsearch(url_split.netloc) else: self.resource_connection = None user, pwd, host, port, self.database = influx_utils.split_url(url) self.sample_connection = influxdb.InfluxDBClient( host, port, user, pwd, self.database) def upgrade(self): self.upgrade_resource_database() self.upgrade_sample_database() def upgrade_resource_database(self): if not self.resource_connection: return iclient = es.client.IndicesClient(self.resource_connection) template = { 'template': 'ceilometer_*', 'mappings': { '_default_': { 'properties': { 'first_sample_timestamp': { 'type': 'date' }, 'last_sample_timestamp': { 'type': 'date' }, }, "dynamic_templates": [{ "string_fields": { "match": "*", "match_mapping_type": "string", "mapping": { "type": "string", "index": "not_analyzed" } } }] } } } iclient.put_template(name='ceilometer_resource_template', body=template) iclient.create(self.resource_index) def upgrade_sample_database(self): try: self.sample_connection.create_database(self.database) except influxdb.exceptions.InfluxDBClientError as e: if "database already exists" not in e.content: raise self.sample_connection.create_retention_policy( name=influx_utils.RETENTION_POLICY_NAME, duration="INF", replication=cfg.CONF.database.influxdb_replication, database=self.database, default=True) if cfg.CONF.database.metering_time_to_live > 0: duration = "%ss" % cfg.CONF.database.metering_time_to_live self.sample_connection.alter_retention_policy( name=influx_utils.RETENTION_POLICY_NAME, database=self.database, duration=duration, replication=cfg.CONF.database.influxdb_replication, default=True) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, limit=None, unique=None): if not self.resource_connection: raise base.NoResultFound( "Resource connection url is not defined and " "meter requests could not be processed") if limit == 0: return q_args = es_utils.make_query(self.resource_index, resource=resource, user=user, project=project, source=source, metaquery=metaquery, limit=limit) results = self.resource_connection.search( fields=['_type', '_id', '_source'], **q_args) return es_utils.search_results_to_meters(results, limit, unique) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, limit=None): if not self.resource_connection: raise base.NoResultFound( "Resource connection url is not defined and " "resource requests could not be processed") if limit == 0: return q_args = es_utils.make_query(self.resource_index, user, project, source, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource, limit) results = self.resource_connection.search( fields=['_type', '_id', '_source'], **q_args) return es_utils.search_results_to_resources(results) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): # Note InfluxDB should have a lower time bound in query, # otherwise it will be defined as 1970-01-01T00:00:00. if (groupby and set(groupby) - set([ 'user_id', 'project_id', 'resource_id', 'source', 'resource_metadata.instance_type' ])): raise ceilometer.NotImplementedError( "Unable to group by these fields") if any([aggr.func == 'cardinality' for aggr in (aggregate or [])]): raise ceilometer.NotImplementedError( "Cardinality aggregation is not supported " "by StackLight backends") try: if (not sample_filter.start_timestamp or not sample_filter.end_timestamp): first, last = self.get_time_boundary(sample_filter) sample_filter.start_timestamp = \ sample_filter.start_timestamp or first unit = self.get_unit(sample_filter) except base.NoResultFound: return [] query = influx_utils.make_aggregate_query(sample_filter, period, groupby, aggregate) response = self._query(query) stats = [] for serie, points in response.items(): measurement, tags = serie for point in points or []: stats.append( influx_utils.point_to_stat(point, tags, period, aggregate, unit)) return [stat for stat in stats if stat] def get_samples(self, sample_filter, limit=None): if limit is 0: return response = self._query( influx_utils.make_list_query(sample_filter, limit)) for point in response.get_points(influx_utils.MEASUREMENT): yield influx_utils.point_to_sample(point) def query_samples(self, filter_expr=None, orderby=None, limit=None): q = influx_utils.make_complex_query(filter_expr, limit) response = self._query(q) samples = [] for point in response.get_points(influx_utils.MEASUREMENT): samples.append(influx_utils.point_to_sample(point)) return influx_utils.sort_samples(samples, orderby) def get_unit(self, sample_filter): meter = sample_filter.meter if meter in units.UNITS_BY_METRIC: return units.UNITS_BY_METRIC[meter] response = self._query(influx_utils.make_unit_query(sample_filter)) try: point = response.get_points(influx_utils.MEASUREMENT).next() except StopIteration: raise base.NoResultFound() units.UNITS_BY_METRIC[meter] = point['unit'] return point['unit'] def get_time_boundary(self, sample_filter): """Find timestamp of the first matching sample in the database.""" response = self._query( influx_utils.make_time_bounds_query(sample_filter)) try: first_point = response.get_points(influx_utils.MEASUREMENT).next() except StopIteration: raise base.NoResultFound() start_timestamp = utils.sanitize_timestamp(first_point['first']) end_timestamp = utils.sanitize_timestamp(first_point['last']) return start_timestamp, end_timestamp def _query(self, q): """Make a query to InfluxDB database. :param q: Query string in InfluxDB query format. :returns a response ResultSet """ LOG.debug("InfluxDB query requested: %s" % q) try: return self.sample_connection.query(q) except influxdb.exceptions.InfluxDBClientError as e: LOG.exception(_LE("Client error during the InfluxDB query: %s"), e) return influxdb.resultset.ResultSet({}) def record_metering_data(self, data): """Records data into databases Method is needed for testing needs only. In real life, data will be written to the databases by StackLight. """ data['counter_name'] = utils.decode_unicode(data['counter_name']) self.resource_connection.update(index=self.resource_index, doc_type='source', id=data['resource_id'], body=es_utils.sample_to_resource(data)) self.sample_connection.write_points( [influx_utils.sample_to_point(data)], "n", self.database, influx_utils.RETENTION_POLICY_NAME) if self._refresh_on_write: self.resource_connection.indices.refresh(self.resource_index) while self.resource_connection.cluster.pending_tasks( local=True)['tasks']: pass def clear(self): self.resource_connection.indices.delete(index=self.resource_index, ignore=[400, 404]) self.sample_connection.drop_database(self.database)
def __init__(self, conf, AVAILABLE_CAPABILITIES): """Constructor.""" self._CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES, AVAILABLE_CAPABILITIES)
class Connection(base.Connection): """Put the event data into a SQLAlchemy database. Tables:: - EventType - event definition - { id: event type id desc: description of event } - Event - event data - { id: event id message_id: message id generated = timestamp of event event_type_id = event type -> eventtype.id } - TraitInt - int trait value - { event_id: event -> event.id key: trait name value: integer value } - TraitDatetime - datetime trait value - { event_id: event -> event.id key: trait name value: datetime value } - TraitText - text trait value - { event_id: event -> event.id key: trait name value: text value } - TraitFloat - float trait value - { event_id: event -> event.id key: trait name value: float value } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url): # Set max_retries to 0, since oslo.db in certain cases may attempt # to retry making the db connection retried max_retries ^ 2 times # in failure case and db reconnection has already been implemented # in storage.__init__.get_connection_from_config function options = dict(cfg.CONF.database.items()) options['max_retries'] = 0 self._engine_facade = db_session.EngineFacade(url, **options) def upgrade(self): # NOTE(gordc): to minimise memory, only import migration when needed from oslo_db.sqlalchemy import migration path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', '..', 'storage', 'sqlalchemy', 'migrate_repo') migration.db_sync(self._engine_facade.get_engine(), path) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) engine.dispose() def _get_or_create_event_type(self, event_type, session=None): """Check if an event type with the supplied name is already exists. If not, we create it and return the record. This may result in a flush. """ if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) return et def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. """ session = self._engine_facade.get_session() error = None for event_model in event_models: event = None try: with session.begin(): event_type = self._get_or_create_event_type( event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated, event_model.raw) session.add(event) session.flush() if event_model.traits: trait_map = {} for trait in event_model.traits: if trait_map.get(trait.dtype) is None: trait_map[trait.dtype] = [] trait_map[trait.dtype].append( {'event_id': event.id, 'key': trait.name, 'value': trait.value}) for dtype in trait_map.keys(): model = TRAIT_ID_TO_MODEL[dtype] session.execute(model.__table__.insert(), trait_map[dtype]) except dbexc.DBDuplicateEntry as e: LOG.info(_LI("Duplicate event detected, skipping it: %s") % e) except KeyError as e: LOG.exception(_LE('Failed to record event: %s') % e) except Exception as e: LOG.exception(_LE('Failed to record event: %s') % e) error = e if error: raise error def get_events(self, event_filter, limit=None): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance """ if limit == 0: return session = self._engine_facade.get_session() with session.begin(): # Build up the join conditions event_join_conditions = [models.EventType.id == models.Event.event_type_id] if event_filter.event_type: event_join_conditions.append(models.EventType.desc == event_filter.event_type) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions.append( models.Event.message_id == event_filter.message_id) if event_filter.start_timestamp: event_filter_conditions.append( models.Event.generated >= event_filter.start_timestamp) if event_filter.end_timestamp: event_filter_conditions.append( models.Event.generated <= event_filter.end_timestamp) trait_subq = None # Build trait filter if event_filter.traits_filter: filters = list(event_filter.traits_filter) trait_filter = filters.pop() key = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') trait_type, value = list(trait_filter.items())[0] trait_subq = _build_trait_query(session, trait_type, key, value, op) for trait_filter in filters: key = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') trait_type, value = list(trait_filter.items())[0] q = _build_trait_query(session, trait_type, key, value, op) trait_subq = trait_subq.filter( trait_subq.subquery().c.ev_id == q.subquery().c.ev_id) trait_subq = trait_subq.subquery() query = (session.query(models.Event.id) .join(models.EventType, sa.and_(*event_join_conditions))) if trait_subq is not None: query = query.join(trait_subq, trait_subq.c.ev_id == models.Event.id) if event_filter.admin_proj: no_proj_q = session.query(models.TraitText.event_id).filter( models.TraitText.key == 'project_id') admin_q = (session.query(models.TraitText.event_id).filter( ~sa.exists().where(models.TraitText.event_id == no_proj_q.subquery().c.event_id)).union( session.query(models.TraitText.event_id).filter(sa.and_( models.TraitText.key == 'project_id', models.TraitText.value == event_filter.admin_proj, models.Event.id == models.TraitText.event_id)))) query = query.filter(sa.exists().where( models.Event.id == admin_q.subquery().c.trait_text_event_id)) if event_filter_conditions: query = query.filter(sa.and_(*event_filter_conditions)) query = query.order_by(models.Event.generated).limit(limit) event_list = {} # get a list of all events that match filters for (id_, generated, message_id, desc, raw) in query.add_columns( models.Event.generated, models.Event.message_id, models.EventType.desc, models.Event.raw).all(): event_list[id_] = api_models.Event(message_id, desc, generated, [], raw) # Query all traits related to events. # NOTE (gordc): cast is done because pgsql defaults to TEXT when # handling unknown values such as null. trait_q = ( session.query( models.TraitDatetime.event_id, models.TraitDatetime.key, models.TraitDatetime.value, sa.cast(sa.null(), sa.Integer), sa.cast(sa.null(), sa.Float(53)), sa.cast(sa.null(), sa.String(255))) .filter(sa.exists().where( models.TraitDatetime.event_id == query.subquery().c.id)) ).union_all( session.query( models.TraitInt.event_id, models.TraitInt.key, sa.null(), models.TraitInt.value, sa.null(), sa.null()) .filter(sa.exists().where( models.TraitInt.event_id == query.subquery().c.id)), session.query( models.TraitFloat.event_id, models.TraitFloat.key, sa.null(), sa.null(), models.TraitFloat.value, sa.null()) .filter(sa.exists().where( models.TraitFloat.event_id == query.subquery().c.id)), session.query( models.TraitText.event_id, models.TraitText.key, sa.null(), sa.null(), sa.null(), models.TraitText.value) .filter(sa.exists().where( models.TraitText.event_id == query.subquery().c.id))) for id_, key, t_date, t_int, t_float, t_text in ( trait_q.order_by(models.TraitDatetime.key)).all(): if t_int is not None: dtype = api_models.Trait.INT_TYPE val = t_int elif t_float is not None: dtype = api_models.Trait.FLOAT_TYPE val = t_float elif t_date is not None: dtype = api_models.Trait.DATETIME_TYPE val = t_date else: dtype = api_models.Trait.TEXT_TYPE val = t_text try: trait_model = api_models.Trait(key, dtype, val) event_list[id_].append_trait(trait_model) except KeyError: # NOTE(gordc): this is expected as we do not set REPEATABLE # READ (bug 1506717). if query is run while recording new # event data, trait query may return more data than event # query. they can be safely discarded. pass return event_list.values() def get_event_types(self): """Return all event types as an iterable of strings.""" session = self._engine_facade.get_session() with session.begin(): query = (session.query(models.EventType.desc). order_by(models.EventType.desc)) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime]: query = (session.query(trait_model.key) .join(models.Event, models.Event.id == trait_model.event_id) .join(models.EventType, sa.and_(models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)) .distinct()) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for row in query.all(): yield {'name': row[0], 'data_type': dtype} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime]: query = (session.query(trait_model.key, trait_model.value) .join(models.Event, models.Event.id == trait_model.event_id) .join(models.EventType, sa.and_(models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)) .order_by(trait_model.key)) if trait_type: query = query.filter(trait_model.key == trait_type) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for k, v in query.all(): yield api_models.Trait(name=k, dtype=dtype, value=v) def clear_expired_event_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ session = self._engine_facade.get_session() with session.begin(): end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) event_q = (session.query(models.Event.id) .filter(models.Event.generated < end)) event_subq = event_q.subquery() for trait_model in [models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime]: (session.query(trait_model) .filter(trait_model.event_id.in_(event_subq)) .delete(synchronize_session="fetch")) event_rows = event_q.delete() # remove EventType and TraitType with no corresponding # matching events and traits (session.query(models.EventType) .filter(~models.EventType.events.any()) .delete(synchronize_session="fetch")) LOG.info(_LI("%d events are removed from database"), event_rows)
class Connection(base.Connection): """Base Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def get_meters(self, user=None, project=None, resource=None, source=None, metaquery=None, pagination=None): """Return an iterable of models.Meter instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param resource: Optional resource filter. :param source: Optional source filter. :param metaquery: Optional dict with metadata to match on. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') metaquery = metaquery or {} q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if resource is not None: q['_id'] = resource if source is not None: q['source'] = source q.update(metaquery) for r in self.db.resource.find(q): for r_meter in r['meter']: yield models.Meter( name=r_meter['counter_name'], type=r_meter['counter_type'], # Return empty string if 'counter_unit' is not valid for # backward compatibility. unit=r_meter.get('counter_unit', ''), resource_id=r['_id'], project_id=r['project_id'], source=r['source'], user_id=r['user_id'], ) def get_samples(self, sample_filter, limit=None): """Return an iterable of model.Sample instances. :param sample_filter: Filter. :param limit: Maximum number of results to return. """ if limit == 0: return [] q = pymongo_utils.make_query_from_filter(sample_filter, require_meter=False) return self._retrieve_samples(q, [("timestamp", pymongo.DESCENDING)], limit) def record_events(self, event_models): """Write the events to database. Return a list of events of type models.Event.DUPLICATE in case of trying to write an already existing event to the database, or models.Event.UNKONW_PROBLEM in case of any failures with recording the event in the database. :param event_models: a list of models.Event objects. """ problem_events = [] for event_model in event_models: traits = [] if event_model.traits: for trait in event_model.traits: traits.append({'trait_name': trait.name, 'trait_type': trait.dtype, 'trait_value': trait.value}) try: self.db.event.insert( {'_id': event_model.message_id, 'event_type': event_model.event_type, 'timestamp': event_model.generated, 'traits': traits}) except pymongo.errors.DuplicateKeyError as ex: LOG.exception(_("Failed to record duplicated event: %s") % ex) problem_events.append((ev_models.Event.DUPLICATE, event_model)) except Exception as ex: LOG.exception(_("Failed to record event: %s") % ex) problem_events.append((ev_models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. """ q = pymongo_utils.make_events_query_from_filter(event_filter) for event in self.db.event.find(q): traits = [] for trait in event['traits']: traits.append( ev_models.Trait(name=trait['trait_name'], dtype=int(trait['trait_type']), value=trait['trait_value'])) yield ev_models.Event(message_id=event['_id'], event_type=event['event_type'], generated=event['timestamp'], traits=traits) def get_event_types(self): """Return all event types as an iter of strings.""" event_types = set() events = self.db.event.find() for event in events: event_type = event['event_type'] if event_type not in event_types: event_types.add(event_type) yield event_type def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event. """ trait_names = set() events = self.db.event.find({'event_type': event_type}) for event in events: for trait in event['traits']: trait_name = trait['trait_name'] if trait_name not in trait_names: # Here we check that our method return only unique # trait types. Method will return only one trait type. It # is proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) yield {'name': trait_name, 'data_type': trait['trait_type']} def get_traits(self, event_type, trait_name=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_name: the name of the Trait to filter by """ if not trait_name: events = self.db.event.find({'event_type': event_type}) else: # We choose events that simultaneously have event_type and certain # trait_name, and retrieve events contains only mentioned traits. events = self.db.event.find({'$and': [{'event_type': event_type}, {'traits.trait_name': trait_name}]}, {'traits': {'$elemMatch': {'trait_name': trait_name}} }) for event in events: for trait in event['traits']: yield ev_models.Trait(name=trait['trait_name'], dtype=trait['trait_type'], value=trait['trait_value']) def query_samples(self, filter_expr=None, orderby=None, limit=None): if limit == 0: return [] query_filter = {} orderby_filter = [("timestamp", pymongo.DESCENDING)] transformer = pymongo_utils.QueryTransformer() if orderby is not None: orderby_filter = transformer.transform_orderby(orderby) if filter_expr is not None: query_filter = transformer.transform_filter(filter_expr) return self._retrieve_samples(query_filter, orderby_filter, limit) def _retrieve_samples(self, query, orderby, limit): if limit is not None: samples = self.db.meter.find(query, limit=limit, sort=orderby) else: samples = self.db.meter.find(query, sort=orderby) for s in samples: # Remove the ObjectId generated by the database when # the sample was inserted. It is an implementation # detail that should not leak outside of the driver. del s['_id'] # Backward compatibility for samples without units s['counter_unit'] = s.get('counter_unit', '') # Tolerate absence of recorded_at in older datapoints s['recorded_at'] = s.get('recorded_at') yield models.Sample(**s)
class Connection(base.Connection): """Base event Connection class for MongoDB and DB2 drivers.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def record_events(self, event_models): """Write the events to database. Return a list of events of type models.Event.DUPLICATE in case of trying to write an already existing event to the database, or models.Event.UNKONW_PROBLEM in case of any failures with recording the event in the database. :param event_models: a list of models.Event objects. """ problem_events = [] for event_model in event_models: traits = [] if event_model.traits: for trait in event_model.traits: traits.append({ 'trait_name': trait.name, 'trait_type': trait.dtype, 'trait_value': trait.value }) try: self.db.event.insert({ '_id': event_model.message_id, 'event_type': event_model.event_type, 'timestamp': event_model.generated, 'traits': traits }) except pymongo.errors.DuplicateKeyError as ex: LOG.exception(_("Failed to record duplicated event: %s") % ex) problem_events.append((models.Event.DUPLICATE, event_model)) except Exception as ex: LOG.exception(_("Failed to record event: %s") % ex) problem_events.append( (models.Event.UNKNOWN_PROBLEM, event_model)) return problem_events def get_events(self, event_filter): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. """ q = pymongo_utils.make_events_query_from_filter(event_filter) for event in self.db.event.find(q): traits = [] for trait in event['traits']: traits.append( models.Trait(name=trait['trait_name'], dtype=int(trait['trait_type']), value=trait['trait_value'])) yield models.Event(message_id=event['_id'], event_type=event['event_type'], generated=event['timestamp'], traits=traits) def get_event_types(self): """Return all event types as an iter of strings.""" event_types = set() events = self.db.event.find() for event in events: event_type = event['event_type'] if event_type not in event_types: event_types.add(event_type) yield event_type def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event. """ trait_names = set() events = self.db.event.find({'event_type': event_type}) for event in events: for trait in event['traits']: trait_name = trait['trait_name'] if trait_name not in trait_names: # Here we check that our method return only unique # trait types. Method will return only one trait type. It # is proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) yield { 'name': trait_name, 'data_type': trait['trait_type'] } def get_traits(self, event_type, trait_name=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_name: the name of the Trait to filter by """ if not trait_name: events = self.db.event.find({'event_type': event_type}) else: # We choose events that simultaneously have event_type and certain # trait_name, and retrieve events contains only mentioned traits. events = self.db.event.find( { '$and': [{ 'event_type': event_type }, { 'traits.trait_name': trait_name }] }, {'traits': { '$elemMatch': { 'trait_name': trait_name } }}) for event in events: for trait in event['traits']: yield models.Trait(name=trait['trait_name'], dtype=trait['trait_type'], value=trait['trait_value'])