class Connection(pymongo_base.Connection): """Put the event data into a MongoDB database.""" CONNECTION_POOL = pymongo_utils.ConnectionPool() def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instanciate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correcly updated if # needed. self.upgrade() def clear(self): self.conn.drop_database(self.db) # Connection will be reopened automatically if needed self.conn.close()
class Connection(pymongo_base.Connection): """Put the event data into a MongoDB database.""" CONNECTION_POOL = pymongo_utils.ConnectionPool() def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instantiate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correctly updated if # needed. self.upgrade() def upgrade(self): # create collection if not present if 'event' not in self.db.conn.collection_names(): self.db.conn.create_collection('event') # Establish indexes # NOTE(idegtiarov): This indexes cover get_events, get_event_types, and # get_trait_types requests based on event_type and timestamp fields. self.db.event.create_index( [('event_type', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING)], name='event_type_idx' ) ttl = cfg.CONF.database.event_time_to_live impl_mongodb.Connection.update_ttl(ttl, 'event_ttl', 'timestamp', self.db.event) def clear(self): self.conn.drop_database(self.db.name) # Connection will be reopened automatically if needed self.conn.close() @staticmethod def clear_expired_event_data(ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ LOG.debug("Clearing expired event data is based on native " "MongoDB time to live feature and going in background.")
class Connection(pymongo_base.Connection): """Put the alarm data into a MongoDB database.""" CONNECTION_POOL = pymongo_utils.ConnectionPool() def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instantiate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correctly updated if # needed. self.upgrade() def upgrade(self): super(Connection, self).upgrade() # Establish indexes ttl = cfg.CONF.database.alarm_history_time_to_live impl_mongodb.Connection.update_ttl( ttl, 'alarm_history_ttl', 'timestamp', self.db.alarm_history) def clear(self): self.conn.drop_database(self.db.name) # Connection will be reopened automatically if needed self.conn.close() def clear_expired_alarm_history_data(self, alarm_history_ttl): """Clear expired alarm history data from the backend storage system. Clearing occurs according to the time-to-live. :param alarm_history_ttl: Number of seconds to keep alarm history records for. """ LOG.debug("Clearing expired alarm history data is based on native " "MongoDB time to live feature and going in background.")
class Connection(pymongo_base.Connection): """The db2 event storage for Ceilometer.""" CONNECTION_POOL = pymongo_utils.ConnectionPool() def __init__(self, url): # Since we are using pymongo, even though we are connecting to DB2 # we still have to make sure that the scheme which used to distinguish # db2 driver from mongodb driver be replaced so that pymongo will not # produce an exception on the scheme. url = url.replace('db2:', 'mongodb:', 1) self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.2 to use aggregate(), since we are using mongodb # as backend for test, the following code is necessary to make sure # that the test wont try aggregate on older mongodb during the test. # For db2, the versionArray won't be part of the server_info, so there # will not be exception when real db2 gets used as backend. server_info = self.conn.server_info() if server_info.get('sysInfo'): self._using_mongodb = True else: self._using_mongodb = False if self._using_mongodb and server_info.get('versionArray') < [2, 2]: raise storage.StorageBadVersion("Need at least MongoDB 2.2") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) self.upgrade() def upgrade(self): # create collection if not present if 'event' not in self.db.conn.collection_names(): self.db.conn.create_collection('event') def clear(self): # drop_database command does nothing on db2 database since this has # not been implemented. However calling this method is important for # removal of all the empty dbs created during the test runs since # test run is against mongodb on Jenkins self.conn.drop_database(self.db.name) self.conn.close()
def test_mongodb_connect_raises_after_custom_number_of_attempts(self): retry_interval = 13 max_retries = 37 self.CONF.set_override('retry_interval', retry_interval, group='database') self.CONF.set_override('max_retries', max_retries, group='database') # PyMongo is being used to connect even to DB2, but it only # accepts URLs with the 'mongodb' scheme. This replacement is # usually done in the DB2 connection implementation, but since # we don't call that, we have to do it here. self.CONF.set_override('connection', self.db_manager.url.replace( 'db2:', 'mongodb:', 1), group='database') pool = pymongo_utils.ConnectionPool() with contextlib.nested( mock.patch( 'pymongo.MongoClient', side_effect=pymongo.errors.ConnectionFailure('foo')), mock.patch.object(pymongo_utils.LOG, 'error'), mock.patch.object(pymongo_utils.LOG, 'warn'), mock.patch.object(pymongo_utils.time, 'sleep')) as (MockMongo, MockLOGerror, MockLOGwarn, Mocksleep): self.assertRaises(pymongo.errors.ConnectionFailure, pool.connect, self.CONF.database.connection) Mocksleep.assert_has_calls( [mock.call(retry_interval) for i in range(max_retries)]) MockLOGwarn.assert_any_call( _('Unable to connect to the database server: %(errmsg)s.' ' Trying again in %(retry_interval)d seconds.') % { 'errmsg': 'foo', 'retry_interval': retry_interval }) MockLOGerror.assert_called_with( _('Unable to connect to the database after ' '%(retries)d retries. Giving up.') % {'retries': max_retries})
class Connection(pymongo_base.Connection): """The db2 storage for Ceilometer Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() GROUP = { '_id': '$counter_name', 'unit': { '$min': '$counter_unit' }, 'min': { '$min': '$counter_volume' }, 'max': { '$max': '$counter_volume' }, 'sum': { '$sum': '$counter_volume' }, 'count': { '$sum': 1 }, 'duration_start': { '$min': '$timestamp' }, 'duration_end': { '$max': '$timestamp' }, } PROJECT = { '_id': 0, 'unit': 1, 'min': 1, 'max': 1, 'sum': 1, 'count': 1, 'avg': { '$divide': ['$sum', '$count'] }, 'duration_start': 1, 'duration_end': 1, } SORT_OPERATION_MAP = {'desc': pymongo.DESCENDING, 'asc': pymongo.ASCENDING} SECONDS_IN_A_DAY = 86400 def __init__(self, url): # Since we are using pymongo, even though we are connecting to DB2 # we still have to make sure that the scheme which used to distinguish # db2 driver from mongodb driver be replaced so that pymongo will not # produce an exception on the scheme. url = url.replace('db2:', 'mongodb:', 1) self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.2 to use aggregate(), since we are using mongodb # as backend for test, the following code is necessary to make sure # that the test wont try aggregate on older mongodb during the test. # For db2, the versionArray won't be part of the server_info, so there # will not be exception when real db2 gets used as backend. server_info = self.conn.server_info() if server_info.get('sysInfo'): self._using_mongodb = True else: self._using_mongodb = False if self._using_mongodb and server_info.get('versionArray') < [2, 2]: raise storage.StorageBadVersion("Need at least MongoDB 2.2") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) self.upgrade() @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort parameters """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir = cls.SORT_OPERATION_MAP.get(sort_dir, cls.SORT_OPERATION_MAP['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions def upgrade(self, version=None): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. if self.db.resource.index_information() == {}: resource_id = str(bson.objectid.ObjectId()) self.db.resource.insert({ '_id': resource_id, 'no_key': resource_id }) meter_id = str(bson.objectid.ObjectId()) timestamp = timeutils.utcnow() self.db.meter.insert({ '_id': meter_id, 'no_key': meter_id, 'timestamp': timestamp }) self.db.resource.ensure_index([('user_id', pymongo.ASCENDING), ('project_id', pymongo.ASCENDING), ('source', pymongo.ASCENDING)], name='resource_idx') self.db.meter.ensure_index([('resource_id', pymongo.ASCENDING), ('user_id', pymongo.ASCENDING), ('project_id', pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ('source', pymongo.ASCENDING)], name='meter_idx') self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') self.db.resource.remove({'_id': resource_id}) self.db.meter.remove({'_id': meter_id}) # remove API v1 related table self.db.user.drop() self.db.project.drop() def clear(self): # db2 does not support drop_database, remove all collections for col in ['resource', 'meter']: self.db[col].drop() # drop_database command does nothing on db2 database since this has # not been implemented. However calling this method is important for # removal of all the empty dbs created during the test runs since # test run is against mongodb on Jenkins self.conn.drop_database(self.db) self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata self.db.resource.update( {'_id': data['resource_id']}, { '$set': { 'project_id': data['project_id'], 'user_id': data['user_id'] or 'null', 'metadata': data['resource_metadata'], 'source': data['source'], }, '$addToSet': { 'meter': { 'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, ) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() # Make sure that the data does have field _id which db2 wont add # automatically. if record.get('_id') is None: record['_id'] = str(bson.objectid.ObjectId()) self.db.meter.insert(record) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise ceilometer.NotImplementedError('Pagination not implemented') metaquery = metaquery or {} q = {} if user is not None: q['user_id'] = user if project is not None: q['project_id'] = project if source is not None: q['source'] = source if resource is not None: q['resource_id'] = resource # Add resource_ prefix so it matches the field in the db q.update( dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) if start_timestamp or end_timestamp: # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range( start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: q['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource', 'timestamp') sort_keys.insert(0, 'resource_id') sort_instructions = self._build_sort_instructions(sort_keys=sort_keys, sort_dir='desc') resource = lambda x: x['resource_id'] meters = self.db.meter.find(q, sort=sort_instructions) for resource_id, r_meters in itertools.groupby(meters, key=resource): # Because we have to know first/last timestamp, and we need a full # list of references to the resource's meters, we need a tuple # here. r_meters = tuple(r_meters) latest_meter = r_meters[0] last_ts = latest_meter['timestamp'] first_ts = r_meters[-1]['timestamp'] yield models.Resource(resource_id=latest_meter['resource_id'], project_id=latest_meter['project_id'], first_sample_timestamp=first_ts, last_sample_timestamp=last_ts, source=latest_meter['source'], user_id=latest_meter['user_id'], metadata=latest_meter['resource_metadata']) def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set(['user_id', 'project_id', 'resource_id', 'source'])): raise ceilometer.NotImplementedError( "Unable to group by these fields") if aggregate: raise ceilometer.NotImplementedError( 'Selectable aggregates not implemented') q = pymongo_utils.make_query_from_filter(sample_filter) if period: if sample_filter.start: period_start = sample_filter.start else: period_start = self.db.meter.find(limit=1, sort=[('timestamp', pymongo.ASCENDING) ])[0]['timestamp'] if groupby: sort_keys = ['counter_name'] + groupby + ['timestamp'] else: sort_keys = ['counter_name', 'timestamp'] sort_instructions = self._build_sort_instructions(sort_keys=sort_keys, sort_dir='asc') meters = self.db.meter.find(q, sort=sort_instructions) def _group_key(meter): # the method to define a key for groupby call key = {} for y in sort_keys: if y == 'timestamp' and period: key[y] = ( timeutils.delta_seconds(period_start, meter[y]) // period) elif y != 'timestamp': key[y] = meter[y] return key def _to_offset(periods): return { 'days': (periods * period) // self.SECONDS_IN_A_DAY, 'seconds': (periods * period) % self.SECONDS_IN_A_DAY } for key, grouped_meters in itertools.groupby(meters, key=_group_key): stat = models.Statistics(unit=None, min=sys.maxint, max=-sys.maxint, avg=0, sum=0, count=0, period=0, period_start=0, period_end=0, duration=0, duration_start=0, duration_end=0, groupby=None) for meter in grouped_meters: stat.unit = meter.get('counter_unit', '') m_volume = meter.get('counter_volume') if stat.min > m_volume: stat.min = m_volume if stat.max < m_volume: stat.max = m_volume stat.sum += m_volume stat.count += 1 if stat.duration_start == 0: stat.duration_start = meter['timestamp'] stat.duration_end = meter['timestamp'] if groupby and not stat.groupby: stat.groupby = {} for group_key in groupby: stat.groupby[group_key] = meter[group_key] stat.duration = timeutils.delta_seconds(stat.duration_start, stat.duration_end) stat.avg = stat.sum / stat.count if period: stat.period = period periods = key.get('timestamp') stat.period_start = ( period_start + datetime.timedelta(**(_to_offset(periods)))) stat.period_end = ( period_start + datetime.timedelta(**(_to_offset(periods + 1)))) else: stat.period_start = stat.duration_start stat.period_end = stat.duration_end yield stat
class Connection(pymongo_base.Connection): """Put the data into a MongoDB database Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() STANDARD_AGGREGATES = dict([(a.name, a) for a in [ pymongo_utils.SUM_AGGREGATION, pymongo_utils.AVG_AGGREGATION, pymongo_utils.MIN_AGGREGATION, pymongo_utils.MAX_AGGREGATION, pymongo_utils.COUNT_AGGREGATION, ]]) AGGREGATES = dict([(a.name, a) for a in [ pymongo_utils.SUM_AGGREGATION, pymongo_utils.AVG_AGGREGATION, pymongo_utils.MIN_AGGREGATION, pymongo_utils.MAX_AGGREGATION, pymongo_utils.COUNT_AGGREGATION, pymongo_utils.STDDEV_AGGREGATION, pymongo_utils.CARDINALITY_AGGREGATION, ]]) SORT_OPERATION_MAPPING = { 'desc': (pymongo.DESCENDING, '$lt'), 'asc': (pymongo.ASCENDING, '$gt') } MAP_RESOURCES = bson.code.Code(""" function () { emit(this.resource_id, {user_id: this.user_id, project_id: this.project_id, source: this.source, first_timestamp: this.timestamp, last_timestamp: this.timestamp, metadata: this.resource_metadata}) }""") REDUCE_RESOURCES = bson.code.Code(""" function (key, values) { var merge = {user_id: values[0].user_id, project_id: values[0].project_id, source: values[0].source, first_timestamp: values[0].first_timestamp, last_timestamp: values[0].last_timestamp, metadata: values[0].metadata} values.forEach(function(value) { if (merge.first_timestamp - value.first_timestamp > 0) { merge.first_timestamp = value.first_timestamp; merge.user_id = value.user_id; merge.project_id = value.project_id; merge.source = value.source; } else if (merge.last_timestamp - value.last_timestamp <= 0) { merge.last_timestamp = value.last_timestamp; merge.metadata = value.metadata; } }); return merge; }""") _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1) _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR, month=12, day=31, hour=23, minute=59, second=59) def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instantiate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) self.version = self.conn.server_info()['versionArray'] # Require MongoDB 2.4 to use $setOnInsert if self.version < pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION: raise storage.StorageBadVersion( "Need at least MongoDB %s" % pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION) connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correctly updated if # needed. self.upgrade() @staticmethod def update_ttl(ttl, ttl_index_name, index_field, coll): """Update or create time_to_live indexes. :param ttl: time to live in seconds. :param ttl_index_name: name of the index we want to update or create. :param index_field: field with the index that we need to update. :param coll: collection which indexes need to be updated. """ indexes = coll.index_information() if ttl <= 0: if ttl_index_name in indexes: coll.drop_index(ttl_index_name) return if ttl_index_name in indexes: return coll.database.command('collMod', coll.name, index={ 'keyPattern': { index_field: pymongo.ASCENDING }, 'expireAfterSeconds': ttl }) coll.create_index([(index_field, pymongo.ASCENDING)], expireAfterSeconds=ttl, name=ttl_index_name) def upgrade(self): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. # create collection if not present if 'resource' not in self.db.conn.collection_names(): self.db.conn.create_collection('resource') if 'meter' not in self.db.conn.collection_names(): self.db.conn.create_collection('meter') name_qualifier = dict(user_id='', project_id='project_') background = dict(user_id=False, project_id=True) for primary in ['user_id', 'project_id']: name = 'meter_%sidx' % name_qualifier[primary] self.db.meter.create_index([ ('resource_id', pymongo.ASCENDING), (primary, pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ], name=name, background=background[primary]) self.db.meter.create_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') # NOTE(ityaptin) This index covers get_resource requests sorting # and MongoDB uses part of this compound index for different # queries based on any of user_id, project_id, last_sample_timestamp # fields self.db.resource.create_index( [('user_id', pymongo.DESCENDING), ('project_id', pymongo.DESCENDING), ('last_sample_timestamp', pymongo.DESCENDING)], name='resource_user_project_timestamp', ) self.db.resource.create_index( [('last_sample_timestamp', pymongo.DESCENDING)], name='last_sample_timestamp_idx') # update or create time_to_live index ttl = cfg.CONF.database.metering_time_to_live self.update_ttl(ttl, 'meter_ttl', 'timestamp', self.db.meter) self.update_ttl(ttl, 'resource_ttl', 'last_sample_timestamp', self.db.resource) def clear(self): self.conn.drop_database(self.db.name) # Connection will be reopened automatically if needed self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata - we use $setOnInsert to # unconditionally insert sample timestamps and resource metadata # (in the update case, this must be conditional on the sample not # being out-of-order) data = copy.deepcopy(data) data['resource_metadata'] = pymongo_utils.improve_keys( data.pop('resource_metadata')) resource = self.db.resource.find_one_and_update( {'_id': data['resource_id']}, { '$set': { 'project_id': data['project_id'], 'user_id': data['user_id'], 'source': data['source'], }, '$setOnInsert': { 'metadata': data['resource_metadata'], 'first_sample_timestamp': data['timestamp'], 'last_sample_timestamp': data['timestamp'], }, '$addToSet': { 'meter': { 'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, return_document=pymongo.ReturnDocument.AFTER, ) # only update last sample timestamp if actually later (the usual # in-order case) last_sample_timestamp = resource.get('last_sample_timestamp') if (last_sample_timestamp is None or last_sample_timestamp <= data['timestamp']): self.db.resource.update_one({'_id': data['resource_id']}, { '$set': { 'metadata': data['resource_metadata'], 'last_sample_timestamp': data['timestamp'] } }) # only update first sample timestamp if actually earlier (the unusual # out-of-order case) # NOTE: a null first sample timestamp is not updated as this indicates # a pre-existing resource document dating from before we started # recording these timestamps in the resource collection first_sample_timestamp = resource.get('first_sample_timestamp') if (first_sample_timestamp is not None and first_sample_timestamp > data['timestamp']): self.db.resource.update_one( {'_id': data['resource_id']}, {'$set': { 'first_sample_timestamp': data['timestamp'] }}) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() self.db.meter.insert_one(record) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs with native MongoDB time-to-live feature. """ LOG.debug("Clearing expired metering data is based on native " "MongoDB time to live feature and going in background.") @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction and paging operator. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort instructions and paging operator """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get( sort_dir, cls.SORT_OPERATION_MAPPING['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions, operation def _get_time_constrained_resources(self, query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource, limit): """Return an iterable of models.Resource instances Items are constrained by sample timestamp. :param query: project/user/source query :param start_timestamp: modified timestamp start range. :param start_timestamp_op: start time operator, like gt, ge. :param end_timestamp: modified timestamp end range. :param end_timestamp_op: end time operator, like lt, le. :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['resource_id'] = resource # Add resource_ prefix so it matches the field in the db query.update( dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) # FIXME(dhellmann): This may not perform very well, # but doing any better will require changing the database # schema and that will need more thought than I have time # to put into it today. # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range(start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: query['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource') sort_instructions = self._build_sort_instructions(sort_keys)[0] # use a unique collection name for the results collection, # as result post-sorting (as oppposed to reduce pre-sorting) # is not possible on an inline M-R out = 'resource_list_%s' % uuid.uuid4() self.db.meter.map_reduce(self.MAP_RESOURCES, self.REDUCE_RESOURCES, out=out, sort={'resource_id': 1}, query=query) try: if limit is not None: results = self.db[out].find(sort=sort_instructions, limit=limit) else: results = self.db[out].find(sort=sort_instructions) for r in results: resource = r['value'] yield models.Resource( resource_id=r['_id'], user_id=resource['user_id'], project_id=resource['project_id'], first_sample_timestamp=resource['first_timestamp'], last_sample_timestamp=resource['last_timestamp'], source=resource['source'], metadata=pymongo_utils.unquote_keys(resource['metadata'])) finally: self.db[out].drop() def _get_floating_resources(self, query, metaquery, resource, limit): """Return an iterable of models.Resource instances Items are unconstrained by timestamp. :param query: project/user/source query :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['_id'] = resource query.update(dict((k, v) for (k, v) in six.iteritems(metaquery))) keys = base._handle_sort_key('resource') sort_keys = [ 'last_sample_timestamp' if i == 'timestamp' else i for i in keys ] sort_instructions = self._build_sort_instructions(sort_keys)[0] if limit is not None: results = self.db.resource.find(query, sort=sort_instructions, limit=limit) else: results = self.db.resource.find(query, sort=sort_instructions) for r in results: yield models.Resource( resource_id=r['_id'], user_id=r['user_id'], project_id=r['project_id'], first_sample_timestamp=r.get('first_sample_timestamp', self._GENESIS), last_sample_timestamp=r.get('last_sample_timestamp', self._APOCALYPSE), source=r['source'], metadata=pymongo_utils.unquote_keys(r['metadata'])) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, limit=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param limit: Maximum number of results to return. """ if limit == 0: return metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {} query = {} if user is not None: query['user_id'] = user if project is not None: query['project_id'] = project if source is not None: query['source'] = source if start_timestamp or end_timestamp: return self._get_time_constrained_resources( query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource, limit) else: return self._get_floating_resources(query, metaquery, resource, limit) @staticmethod def _make_period_dict(period, first_ts): """Create a period field for _id of grouped fields. :param period: Period duration in seconds :param first_ts: First timestamp for first period :return: """ if period >= 0: period_unique_dict = { "period_start": { "$divide": [{ "$subtract": [{ "$subtract": ["$timestamp", first_ts] }, { "$mod": [{ "$subtract": ["$timestamp", first_ts] }, period * 1000] }] }, period * 1000] } } else: # Note(ityaptin) Hack for older MongoDB versions (2.4.+ and older). # Since 2.6+ we could use $literal operator period_unique_dict = {"$period_start": {"$add": [0, 0]}} return period_unique_dict def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set([ 'user_id', 'project_id', 'resource_id', 'source', 'resource_metadata.instance_type' ])): raise ceilometer.NotImplementedError( "Unable to group by these fields") q = pymongo_utils.make_query_from_filter(sample_filter) group_stage = {} project_stage = { "unit": "$_id.unit", "name": "$_id.name", "first_timestamp": "$first_timestamp", "last_timestamp": "$last_timestamp", "period_start": "$_id.period_start", } # Add timestamps to $group stage group_stage.update({ "first_timestamp": { "$min": "$timestamp" }, "last_timestamp": { "$max": "$timestamp" } }) # Define a _id field for grouped documents unique_group_field = {"name": "$counter_name", "unit": "$counter_unit"} # Define a first timestamp for periods if sample_filter.start_timestamp: first_timestamp = sample_filter.start_timestamp else: first_timestamp_cursor = self.db.meter.find(limit=1, sort=[ ('timestamp', pymongo.ASCENDING) ]) if first_timestamp_cursor.count(): first_timestamp = first_timestamp_cursor[0]['timestamp'] else: first_timestamp = utils.EPOCH_TIME # Add a start_period field to unique identifier of grouped documents if period: period_dict = self._make_period_dict(period, first_timestamp) unique_group_field.update(period_dict) # Add a groupby fields to unique identifier of grouped documents if groupby: unique_group_field.update( dict((field.replace(".", "/"), "$%s" % field) for field in groupby)) group_stage.update({"_id": unique_group_field}) self._compile_aggregate_stages(aggregate, group_stage, project_stage) # Aggregation stages list. It's work one by one and uses documents # from previous stages. aggregation_query = [{ '$match': q }, { "$sort": { "timestamp": 1 } }, { "$group": group_stage }, { "$sort": { "_id.period_start": 1 } }, { "$project": project_stage }] # results is dict in pymongo<=2.6.3 and CommandCursor in >=3.0 results = self.db.meter.aggregate(aggregation_query, **self._make_aggregation_params()) return [ self._stats_result_to_model(point, groupby, aggregate, period, first_timestamp) for point in self._get_results(results) ] def _stats_result_aggregates(self, result, aggregate): stats_args = {} for attr, func in Connection.STANDARD_AGGREGATES.items(): if attr in result: stats_args.update( func.finalize(result, version_array=self.version)) if aggregate: stats_args['aggregate'] = {} for agr in aggregate: stats_args['aggregate'].update( Connection.AGGREGATES[agr.func].finalize( result, agr.param, self.version)) return stats_args def _stats_result_to_model(self, result, groupby, aggregate, period, first_timestamp): if period is None: period = 0 first_timestamp = pymongo_utils.from_unix_timestamp(first_timestamp) stats_args = self._stats_result_aggregates(result, aggregate) stats_args['unit'] = result['unit'] stats_args['duration'] = (result["last_timestamp"] - result["first_timestamp"]).total_seconds() stats_args['duration_start'] = result['first_timestamp'] stats_args['duration_end'] = result['last_timestamp'] stats_args['period'] = period start = result.get("period_start", 0) * period stats_args['period_start'] = (first_timestamp + datetime.timedelta(seconds=start)) stats_args['period_end'] = (first_timestamp + datetime.timedelta(seconds=start + period) if period else result['last_timestamp']) stats_args['groupby'] = (dict( (g, result['_id'].get(g.replace(".", "/"))) for g in groupby) if groupby else None) return models.Statistics(**stats_args) def _compile_aggregate_stages(self, aggregate, group_stage, project_stage): if not aggregate: for aggregation in Connection.STANDARD_AGGREGATES.values(): group_stage.update( aggregation.group(version_array=self.version)) project_stage.update( aggregation.project(version_array=self.version)) else: for description in aggregate: aggregation = Connection.AGGREGATES.get(description.func) if aggregation: if not aggregation.validate(description.param): raise storage.StorageBadAggregate( 'Bad aggregate: %s.%s' % (description.func, description.param)) group_stage.update( aggregation.group(description.param, version_array=self.version)) project_stage.update( aggregation.project(description.param, version_array=self.version)) @staticmethod def _get_results(results): if isinstance(results, dict): return results.get('result', []) else: return results def _make_aggregation_params(self): if self.version >= pymongo_utils.COMPLETE_AGGREGATE_COMPATIBLE_VERSION: return {"allowDiskUse": True} return {}
class Connection(pymongo_base.Connection): """Put the data into a MongoDB database Collections:: - meter - the raw incoming data - resource - the metadata for resources - { _id: uuid of resource, metadata: metadata dictionaries user_id: uuid project_id: uuid meter: [ array of {counter_name: string, counter_type: string, counter_unit: string} ] } """ CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) CONNECTION_POOL = pymongo_utils.ConnectionPool() REDUCE_GROUP_CLEAN = bson.code.Code(""" function ( curr, result ) { if (result.resources.indexOf(curr.resource_id) < 0) result.resources.push(curr.resource_id); } """) STANDARD_AGGREGATES = dict( emit_initial=dict(sum='', count='', avg='', min='', max=''), emit_body=dict(sum='sum: this.counter_volume,', count='count: NumberInt(1),', avg='acount: NumberInt(1), asum: this.counter_volume,', min='min: this.counter_volume,', max='max: this.counter_volume,'), reduce_initial=dict(sum='', count='', avg='', min='', max=''), reduce_body=dict(sum='sum: values[0].sum,', count='count: values[0].count,', avg='acount: values[0].acount, asum: values[0].asum,', min='min: values[0].min,', max='max: values[0].max,'), reduce_computation=dict( sum='res.sum += values[i].sum;', count='res.count = NumberInt(res.count + values[i].count);', avg=('res.acount = NumberInt(res.acount + values[i].acount);' 'res.asum += values[i].asum;'), min='if ( values[i].min < res.min ) {res.min = values[i].min;}', max='if ( values[i].max > res.max ) {res.max = values[i].max;}'), finalize=dict(sum='', count='', avg='value.avg = value.asum / value.acount;', min='', max=''), ) UNPARAMETERIZED_AGGREGATES = dict( emit_initial=dict(stddev=('')), emit_body=dict(stddev='sdsum: this.counter_volume,' 'sdcount: 1,' 'weighted_distances: 0,' 'stddev: 0,'), reduce_initial=dict(stddev=''), reduce_body=dict(stddev='sdsum: values[0].sdsum,' 'sdcount: values[0].sdcount,' 'weighted_distances: values[0].weighted_distances,' 'stddev: values[0].stddev,'), reduce_computation=dict(stddev=( 'var deviance = (res.sdsum / res.sdcount) - values[i].sdsum;' 'var weight = res.sdcount / ++res.sdcount;' 'res.weighted_distances += (Math.pow(deviance, 2) * weight);' 'res.sdsum += values[i].sdsum;')), finalize=dict( stddev=('value.stddev = Math.sqrt(value.weighted_distances /' ' value.sdcount);')), ) PARAMETERIZED_AGGREGATES = dict( validate=dict(cardinality=lambda p: p in ['resource_id', 'user_id', 'project_id', 'source']), emit_initial=dict(cardinality=( 'aggregate["cardinality/%(aggregate_param)s"] = 1;' 'var distinct_%(aggregate_param)s = {};' 'distinct_%(aggregate_param)s[this["%(aggregate_param)s"]]' ' = true;')), emit_body=dict(cardinality=( 'distinct_%(aggregate_param)s : distinct_%(aggregate_param)s,' '%(aggregate_param)s : this["%(aggregate_param)s"],')), reduce_initial=dict(cardinality=''), reduce_body=dict(cardinality=( 'aggregate : values[0].aggregate,' 'distinct_%(aggregate_param)s:' ' values[0].distinct_%(aggregate_param)s,' '%(aggregate_param)s : values[0]["%(aggregate_param)s"],')), reduce_computation=dict(cardinality=( 'if (!(values[i]["%(aggregate_param)s"] in' ' res.distinct_%(aggregate_param)s)) {' ' res.distinct_%(aggregate_param)s[values[i]' ' ["%(aggregate_param)s"]] = true;' ' res.aggregate["cardinality/%(aggregate_param)s"] += 1;}')), finalize=dict(cardinality=''), ) EMIT_STATS_COMMON = """ var aggregate = {}; %(aggregate_initial_placeholder)s emit(%(key_val)s, { unit: this.counter_unit, aggregate : aggregate, %(aggregate_body_placeholder)s groupby : %(groupby_val)s, duration_start : this.timestamp, duration_end : this.timestamp, period_start : %(period_start_val)s, period_end : %(period_end_val)s} ) """ MAP_STATS_PERIOD_VAR = """ var period = %(period)d * 1000; var period_first = %(period_first)d * 1000; var period_start = period_first + (Math.floor(new Date(this.timestamp.getTime() - period_first) / period) * period); """ MAP_STATS_GROUPBY_VAR = """ var groupby_fields = %(groupby_fields)s; var groupby = {}; var groupby_key = {}; for ( var i=0; i<groupby_fields.length; i++ ) { groupby[groupby_fields[i]] = this[groupby_fields[i]] groupby_key[groupby_fields[i]] = this[groupby_fields[i]] } """ PARAMS_MAP_STATS = { 'key_val': '\'statistics\'', 'groupby_val': 'null', 'period_start_val': 'this.timestamp', 'period_end_val': 'this.timestamp', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS = bson.code.Code("function () {" + EMIT_STATS_COMMON % PARAMS_MAP_STATS + "}") PARAMS_MAP_STATS_PERIOD = { 'key_val': 'period_start', 'groupby_val': 'null', 'period_start_val': 'new Date(period_start)', 'period_end_val': 'new Date(period_start + period)', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_PERIOD = bson.code.Code("function () {" + MAP_STATS_PERIOD_VAR + EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD + "}") PARAMS_MAP_STATS_GROUPBY = { 'key_val': 'groupby_key', 'groupby_val': 'groupby', 'period_start_val': 'this.timestamp', 'period_end_val': 'this.timestamp', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_GROUPBY = bson.code.Code("function () {" + MAP_STATS_GROUPBY_VAR + EMIT_STATS_COMMON % PARAMS_MAP_STATS_GROUPBY + "}") PARAMS_MAP_STATS_PERIOD_GROUPBY = { 'key_val': 'groupby_key', 'groupby_val': 'groupby', 'period_start_val': 'new Date(period_start)', 'period_end_val': 'new Date(period_start + period)', 'aggregate_initial_placeholder': '%(aggregate_initial_val)s', 'aggregate_body_placeholder': '%(aggregate_body_val)s' } MAP_STATS_PERIOD_GROUPBY = bson.code.Code( "function () {" + MAP_STATS_PERIOD_VAR + MAP_STATS_GROUPBY_VAR + " groupby_key['period_start'] = period_start\n" + EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD_GROUPBY + "}") REDUCE_STATS = bson.code.Code(""" function (key, values) { %(aggregate_initial_val)s var res = { unit: values[0].unit, aggregate: values[0].aggregate, %(aggregate_body_val)s groupby: values[0].groupby, period_start: values[0].period_start, period_end: values[0].period_end, duration_start: values[0].duration_start, duration_end: values[0].duration_end }; for ( var i=1; i<values.length; i++ ) { %(aggregate_computation_val)s if ( values[i].duration_start < res.duration_start ) res.duration_start = values[i].duration_start; if ( values[i].duration_end > res.duration_end ) res.duration_end = values[i].duration_end; } return res; } """) FINALIZE_STATS = bson.code.Code(""" function (key, value) { %(aggregate_val)s value.duration = (value.duration_end - value.duration_start) / 1000; value.period = NumberInt((value.period_end - value.period_start) / 1000); return value; }""") SORT_OPERATION_MAPPING = { 'desc': (pymongo.DESCENDING, '$lt'), 'asc': (pymongo.ASCENDING, '$gt') } MAP_RESOURCES = bson.code.Code(""" function () { emit(this.resource_id, {user_id: this.user_id, project_id: this.project_id, source: this.source, first_timestamp: this.timestamp, last_timestamp: this.timestamp, metadata: this.resource_metadata}) }""") REDUCE_RESOURCES = bson.code.Code(""" function (key, values) { var merge = {user_id: values[0].user_id, project_id: values[0].project_id, source: values[0].source, first_timestamp: values[0].first_timestamp, last_timestamp: values[0].last_timestamp, metadata: values[0].metadata} values.forEach(function(value) { if (merge.first_timestamp - value.first_timestamp > 0) { merge.first_timestamp = value.first_timestamp; merge.user_id = value.user_id; merge.project_id = value.project_id; merge.source = value.source; } else if (merge.last_timestamp - value.last_timestamp <= 0) { merge.last_timestamp = value.last_timestamp; merge.metadata = value.metadata; } }); return merge; }""") _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1) _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR, month=12, day=31, hour=23, minute=59, second=59) def __init__(self, url): # NOTE(jd) Use our own connection pooling on top of the Pymongo one. # We need that otherwise we overflow the MongoDB instance with new # connection since we instanciate a Pymongo client each time someone # requires a new storage connection. self.conn = self.CONNECTION_POOL.connect(url) # Require MongoDB 2.4 to use $setOnInsert if self.conn.server_info()['versionArray'] < [2, 4]: raise storage.StorageBadVersion("Need at least MongoDB 2.4") connection_options = pymongo.uri_parser.parse_uri(url) self.db = getattr(self.conn, connection_options['database']) if connection_options.get('username'): self.db.authenticate(connection_options['username'], connection_options['password']) # NOTE(jd) Upgrading is just about creating index, so let's do this # on connection to be sure at least the TTL is correcly updated if # needed. self.upgrade() def upgrade(self): # Establish indexes # # We need variations for user_id vs. project_id because of the # way the indexes are stored in b-trees. The user_id and # project_id values are usually mutually exclusive in the # queries, so the database won't take advantage of an index # including both. name_qualifier = dict(user_id='', project_id='project_') background = dict(user_id=False, project_id=True) for primary in ['user_id', 'project_id']: name = 'resource_%sidx' % name_qualifier[primary] self.db.resource.ensure_index([ (primary, pymongo.ASCENDING), ('source', pymongo.ASCENDING), ], name=name, background=background[primary]) name = 'meter_%sidx' % name_qualifier[primary] self.db.meter.ensure_index([ ('resource_id', pymongo.ASCENDING), (primary, pymongo.ASCENDING), ('counter_name', pymongo.ASCENDING), ('timestamp', pymongo.ASCENDING), ('source', pymongo.ASCENDING), ], name=name, background=background[primary]) self.db.resource.ensure_index( [('last_sample_timestamp', pymongo.DESCENDING)], name='last_sample_timestamp_idx', sparse=True) self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)], name='timestamp_idx') # remove API v1 related table self.db.user.drop() self.db.project.drop() indexes = self.db.meter.index_information() ttl = cfg.CONF.database.time_to_live if ttl <= 0: if 'meter_ttl' in indexes: self.db.meter.drop_index('meter_ttl') return if 'meter_ttl' in indexes: # NOTE(sileht): manually check expireAfterSeconds because # ensure_index doesn't update index options if the index already # exists if ttl == indexes['meter_ttl'].get('expireAfterSeconds', -1): return self.db.meter.drop_index('meter_ttl') self.db.meter.create_index([('timestamp', pymongo.ASCENDING)], expireAfterSeconds=ttl, name='meter_ttl') def clear(self): self.conn.drop_database(self.db) # Connection will be reopened automatically if needed self.conn.close() def record_metering_data(self, data): """Write the data to the backend storage system. :param data: a dictionary such as returned by ceilometer.meter.meter_message_from_counter """ # Record the updated resource metadata - we use $setOnInsert to # unconditionally insert sample timestamps and resource metadata # (in the update case, this must be conditional on the sample not # being out-of-order) resource = self.db.resource.find_and_modify( {'_id': data['resource_id']}, { '$set': { 'project_id': data['project_id'], 'user_id': data['user_id'], 'source': data['source'], }, '$setOnInsert': { 'metadata': data['resource_metadata'], 'first_sample_timestamp': data['timestamp'], 'last_sample_timestamp': data['timestamp'], }, '$addToSet': { 'meter': { 'counter_name': data['counter_name'], 'counter_type': data['counter_type'], 'counter_unit': data['counter_unit'], }, }, }, upsert=True, new=True, ) # only update last sample timestamp if actually later (the usual # in-order case) last_sample_timestamp = resource.get('last_sample_timestamp') if (last_sample_timestamp is None or last_sample_timestamp <= data['timestamp']): self.db.resource.update({'_id': data['resource_id']}, { '$set': { 'metadata': data['resource_metadata'], 'last_sample_timestamp': data['timestamp'] } }) # only update first sample timestamp if actually earlier (the unusual # out-of-order case) # NOTE: a null first sample timestamp is not updated as this indicates # a pre-existing resource document dating from before we started # recording these timestamps in the resource collection first_sample_timestamp = resource.get('first_sample_timestamp') if (first_sample_timestamp is not None and first_sample_timestamp > data['timestamp']): self.db.resource.update( {'_id': data['resource_id']}, {'$set': { 'first_sample_timestamp': data['timestamp'] }}) # Record the raw data for the meter. Use a copy so we do not # modify a data structure owned by our caller (the driver adds # a new key '_id'). record = copy.copy(data) record['recorded_at'] = timeutils.utcnow() self.db.meter.insert(record) def clear_expired_metering_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ results = self.db.meter.group(key={}, condition={}, reduce=self.REDUCE_GROUP_CLEAN, initial={ 'resources': [], })[0] self.db.resource.remove({'_id': {'$nin': results['resources']}}) @staticmethod def _get_marker(db_collection, marker_pairs): """Return the mark document according to the attribute-value pairs. :param db_collection: Database collection that be query. :param maker_pairs: Attribute-value pairs filter. """ if db_collection is None: return if not marker_pairs: return ret = db_collection.find(marker_pairs, limit=2) if ret.count() == 0: raise base.NoResultFound elif ret.count() > 1: raise base.MultipleResultsFound else: _ret = ret.__getitem__(0) return _ret @classmethod def _recurse_sort_keys(cls, sort_keys, marker, flag): _first = sort_keys[0] value = marker[_first] if len(sort_keys) == 1: return {_first: {flag: value}} else: criteria_equ = {_first: {'eq': value}} criteria_cmp = cls._recurse_sort_keys(sort_keys[1:], marker, flag) return dict(criteria_equ, **criteria_cmp) @classmethod def _build_paginate_query(cls, marker, sort_keys=None, sort_dir='desc'): """Returns a query with sorting / pagination. Pagination works by requiring sort_key and sort_dir. We use the last item in previous page as the 'marker' for pagination. So we return values that follow the passed marker in the order. :param q: The query dict passed in. :param marker: the last item of the previous page; we return the next results after this item. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort parameters, query to use """ all_sort = [] sort_keys = sort_keys or [] all_sort, _op = cls._build_sort_instructions(sort_keys, sort_dir) if marker is not None: sort_criteria_list = [] for i in range(len(sort_keys)): # NOTE(fengqian): Generate the query criteria recursively. # sort_keys=[k1, k2, k3], maker_value=[v1, v2, v3] # sort_flags = ['$lt', '$gt', 'lt']. # The query criteria should be # {'k3': {'$lt': 'v3'}, 'k2': {'eq': 'v2'}, 'k1': # {'eq': 'v1'}}, # {'k2': {'$gt': 'v2'}, 'k1': {'eq': 'v1'}}, # {'k1': {'$lt': 'v1'}} with 'OR' operation. # Each recurse will generate one items of three. sort_criteria_list.append( cls._recurse_sort_keys(sort_keys[:(len(sort_keys) - i)], marker, _op)) metaquery = {"$or": sort_criteria_list} else: metaquery = {} return all_sort, metaquery @classmethod def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'): """Returns a sort_instruction and paging operator. Sort instructions are used in the query to determine what attributes to sort on and what direction to use. :param q: The query dict passed in. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: sort instructions and paging operator """ sort_keys = sort_keys or [] sort_instructions = [] _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get( sort_dir, cls.SORT_OPERATION_MAPPING['desc']) for _sort_key in sort_keys: _instruction = (_sort_key, _sort_dir) sort_instructions.append(_instruction) return sort_instructions, operation @classmethod def paginate_query(cls, q, db_collection, limit=None, marker=None, sort_keys=None, sort_dir='desc'): """Returns a query result with sorting / pagination. Pagination works by requiring sort_key and sort_dir. We use the last item in previous page as the 'marker' for pagination. So we return values that follow the passed marker in the order. :param q: the query dict passed in. :param db_collection: Database collection that be query. :param limit: maximum number of items to return. :param marker: the last item of the previous page; we return the next results after this item. :param sort_keys: array of attributes by which results be sorted. :param sort_dir: direction in which results be sorted (asc, desc). :return: The query with sorting/pagination added. """ sort_keys = sort_keys or [] all_sort, query = cls._build_paginate_query(marker, sort_keys, sort_dir) q.update(query) # NOTE(Fengqian): MongoDB collection.find can not handle limit # when it equals None, it will raise TypeError, so we treat # None as 0 for the value of limit. if limit is None: limit = 0 return db_collection.find(q, limit=limit, sort=all_sort) def _get_time_constrained_resources(self, query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource): """Return an iterable of models.Resource instances Items are constrained by sample timestamp. :param query: project/user/source query :param start_timestamp: modified timestamp start range. :param start_timestamp_op: start time operator, like gt, ge. :param end_timestamp: modified timestamp end range. :param end_timestamp_op: end time operator, like lt, le. :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['resource_id'] = resource # Add resource_ prefix so it matches the field in the db query.update( dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery))) # FIXME(dhellmann): This may not perform very well, # but doing any better will require changing the database # schema and that will need more thought than I have time # to put into it today. # Look for resources matching the above criteria and with # samples in the time range we care about, then change the # resource query to return just those resources by id. ts_range = pymongo_utils.make_timestamp_range(start_timestamp, end_timestamp, start_timestamp_op, end_timestamp_op) if ts_range: query['timestamp'] = ts_range sort_keys = base._handle_sort_key('resource') sort_instructions = self._build_sort_instructions(sort_keys)[0] # use a unique collection name for the results collection, # as result post-sorting (as oppposed to reduce pre-sorting) # is not possible on an inline M-R out = 'resource_list_%s' % uuid.uuid4() self.db.meter.map_reduce(self.MAP_RESOURCES, self.REDUCE_RESOURCES, out=out, sort={'resource_id': 1}, query=query) try: for r in self.db[out].find(sort=sort_instructions): resource = r['value'] yield models.Resource( resource_id=r['_id'], user_id=resource['user_id'], project_id=resource['project_id'], first_sample_timestamp=resource['first_timestamp'], last_sample_timestamp=resource['last_timestamp'], source=resource['source'], metadata=resource['metadata']) finally: self.db[out].drop() def _get_floating_resources(self, query, metaquery, resource): """Return an iterable of models.Resource instances Items are unconstrained by timestamp. :param query: project/user/source query :param metaquery: dict with metadata to match on. :param resource: resource filter. """ if resource is not None: query['_id'] = resource query.update(dict((k, v) for (k, v) in six.iteritems(metaquery))) keys = base._handle_sort_key('resource') sort_keys = [ 'last_sample_timestamp' if i == 'timestamp' else i for i in keys ] sort_instructions = self._build_sort_instructions(sort_keys)[0] for r in self.db.resource.find(query, sort=sort_instructions): yield models.Resource( resource_id=r['_id'], user_id=r['user_id'], project_id=r['project_id'], first_sample_timestamp=r.get('first_sample_timestamp', self._GENESIS), last_sample_timestamp=r.get('last_sample_timestamp', self._APOCALYPSE), source=r['source'], metadata=r['metadata']) def get_resources(self, user=None, project=None, source=None, start_timestamp=None, start_timestamp_op=None, end_timestamp=None, end_timestamp_op=None, metaquery=None, resource=None, pagination=None): """Return an iterable of models.Resource instances :param user: Optional ID for user that owns the resource. :param project: Optional ID for project that owns the resource. :param source: Optional source filter. :param start_timestamp: Optional modified timestamp start range. :param start_timestamp_op: Optional start time operator, like gt, ge. :param end_timestamp: Optional modified timestamp end range. :param end_timestamp_op: Optional end time operator, like lt, le. :param metaquery: Optional dict with metadata to match on. :param resource: Optional resource filter. :param pagination: Optional pagination query. """ if pagination: raise NotImplementedError('Pagination not implemented') metaquery = metaquery or {} query = {} if user is not None: query['user_id'] = user if project is not None: query['project_id'] = project if source is not None: query['source'] = source if start_timestamp or end_timestamp: return self._get_time_constrained_resources( query, start_timestamp, start_timestamp_op, end_timestamp, end_timestamp_op, metaquery, resource) else: return self._get_floating_resources(query, metaquery, resource) def _aggregate_param(self, fragment_key, aggregate): fragment_map = self.STANDARD_AGGREGATES[fragment_key] if not aggregate: return ''.join([f for f in fragment_map.values()]) fragments = '' for a in aggregate: if a.func in self.STANDARD_AGGREGATES[fragment_key]: fragment_map = self.STANDARD_AGGREGATES[fragment_key] fragments += fragment_map[a.func] elif a.func in self.UNPARAMETERIZED_AGGREGATES[fragment_key]: fragment_map = self.UNPARAMETERIZED_AGGREGATES[fragment_key] fragments += fragment_map[a.func] elif a.func in self.PARAMETERIZED_AGGREGATES[fragment_key]: fragment_map = self.PARAMETERIZED_AGGREGATES[fragment_key] v = self.PARAMETERIZED_AGGREGATES['validate'].get(a.func) if not (v and v(a.param)): raise storage.StorageBadAggregate('Bad aggregate: %s.%s' % (a.func, a.param)) params = dict(aggregate_param=a.param) fragments += (fragment_map[a.func] % params) else: raise NotImplementedError('Selectable aggregate function %s' ' is not supported' % a.func) return fragments def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set(['user_id', 'project_id', 'resource_id', 'source'])): raise NotImplementedError("Unable to group by these fields") q = pymongo_utils.make_query_from_filter(sample_filter) if period: if sample_filter.start: period_start = sample_filter.start else: period_start = self.db.meter.find(limit=1, sort=[('timestamp', pymongo.ASCENDING) ])[0]['timestamp'] period_start = int(calendar.timegm(period_start.utctimetuple())) map_params = { 'period': period, 'period_first': period_start, 'groupby_fields': json.dumps(groupby) } if groupby: map_fragment = self.MAP_STATS_PERIOD_GROUPBY else: map_fragment = self.MAP_STATS_PERIOD else: if groupby: map_params = {'groupby_fields': json.dumps(groupby)} map_fragment = self.MAP_STATS_GROUPBY else: map_params = dict() map_fragment = self.MAP_STATS sub = self._aggregate_param map_params['aggregate_initial_val'] = sub('emit_initial', aggregate) map_params['aggregate_body_val'] = sub('emit_body', aggregate) map_stats = map_fragment % map_params reduce_params = dict(aggregate_initial_val=sub('reduce_initial', aggregate), aggregate_body_val=sub('reduce_body', aggregate), aggregate_computation_val=sub( 'reduce_computation', aggregate)) reduce_stats = self.REDUCE_STATS % reduce_params finalize_params = dict(aggregate_val=sub('finalize', aggregate)) finalize_stats = self.FINALIZE_STATS % finalize_params results = self.db.meter.map_reduce( map_stats, reduce_stats, {'inline': 1}, finalize=finalize_stats, query=q, ) # FIXME(terriyu) Fix get_meter_statistics() so we don't use sorted() # to return the results return sorted( (self._stats_result_to_model(r['value'], groupby, aggregate) for r in results['results']), key=operator.attrgetter('period_start')) @staticmethod def _stats_result_aggregates(result, aggregate): stats_args = {} for attr in ['count', 'min', 'max', 'sum', 'avg']: if attr in result: stats_args[attr] = result[attr] if aggregate: stats_args['aggregate'] = {} for a in aggregate: ak = '%s%s' % (a.func, '/%s' % a.param if a.param else '') if ak in result: stats_args['aggregate'][ak] = result[ak] elif 'aggregate' in result: stats_args['aggregate'][ak] = result['aggregate'].get(ak) return stats_args @staticmethod def _stats_result_to_model(result, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result['unit'] stats_args['duration'] = result['duration'] stats_args['duration_start'] = result['duration_start'] stats_args['duration_end'] = result['duration_end'] stats_args['period'] = result['period'] stats_args['period_start'] = result['period_start'] stats_args['period_end'] = result['period_end'] stats_args['groupby'] = (dict( (g, result['groupby'][g]) for g in groupby) if groupby else None) return models.Statistics(**stats_args)