Ejemplo n.º 1
0
    def __init__(self, conf):
        url = conf.database.connection

        # NOTE(jd) Use our own connection pooling on top of the Pymongo one.
        # We need that otherwise we overflow the MongoDB instance with new
        # connection since we instanciate a Pymongo client each time someone
        # requires a new storage connection.
        self.conn = self.CONNECTION_POOL.connect(url)

        # Require MongoDB 2.4 to use $setOnInsert
        if self.conn.server_info()['versionArray'] < [2, 4]:
            raise storage.StorageBadVersion("Need at least MongoDB 2.4")

        connection_options = pymongo.uri_parser.parse_uri(url)
        self.db = getattr(self.conn, connection_options['database'])
        if connection_options.get('username'):
            self.db.authenticate(connection_options['username'],
                                 connection_options['password'])

        self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES,
                                                AVAILABLE_CAPABILITIES)

        # NOTE(jd) Upgrading is just about creating index, so let's do this
        # on connection to be sure at least the TTL is correcly updated if
        # needed.
        self.upgrade()
Ejemplo n.º 2
0
 def get_capabilities(self):
     """Return an dictionary representing the capabilities of this driver.
     """
     available = {
         'meters': {'query': {'simple': True,
                              'metadata': True}},
         'resources': {'query': {'simple': True,
                                 'metadata': True}},
         'samples': {'query': {'simple': True,
                               'metadata': True,
                               'complex': True}},
         'statistics': {'groupby': True,
                        'query': {'simple': True,
                                  'metadata': True},
                        'aggregation': {'standard': True,
                                        'selectable': {
                                            'max': True,
                                            'min': True,
                                            'sum': True,
                                            'avg': True,
                                            'count': True,
                                            'stddev': True,
                                            'cardinality': True}}
                        },
         'alarms': {'query': {'simple': True,
                              'complex': True},
                    'history': {'query': {'simple': True,
                                          'complex': True}}},
     }
     return utils.update_nested(self.DEFAULT_CAPABILITIES, available)
Ejemplo n.º 3
0
    def __init__(self, conf):
        url = conf.database.connection

        # Since we are using pymongo, even though we are connecting to DB2
        # we still have to make sure that the scheme which used to distinguish
        # db2 driver from mongodb driver be replaced so that pymongo will not
        # produce an exception on the scheme.
        url = url.replace("db2:", "mongodb:", 1)
        self.conn = self.CONNECTION_POOL.connect(url)

        # Require MongoDB 2.2 to use aggregate(), since we are using mongodb
        # as backend for test, the following code is necessary to make sure
        # that the test wont try aggregate on older mongodb during the test.
        # For db2, the versionArray won't be part of the server_info, so there
        # will not be exception when real db2 gets used as backend.
        server_info = self.conn.server_info()
        if server_info.get("sysInfo"):
            self._using_mongodb = True
        else:
            self._using_mongodb = False

        if self._using_mongodb and server_info.get("versionArray") < [2, 2]:
            raise storage.StorageBadVersion("Need at least MongoDB 2.2")

        connection_options = pymongo.uri_parser.parse_uri(url)
        self.db = getattr(self.conn, connection_options["database"])
        if connection_options.get("username"):
            self.db.authenticate(connection_options["username"], connection_options["password"])

        self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES, AVAILABLE_CAPABILITIES)

        self.upgrade()
Ejemplo n.º 4
0
 def get_capabilities(self):
     """Return an dictionary representing the capabilities of this driver.
     """
     available = {
         'meters': {'query': {'simple': True,
                              'metadata': True}},
         'resources': {'query': {'simple': True,
                                 'metadata': True}},
         'samples': {'query': {'simple': True,
                               'metadata': True}},
         'statistics': {'query': {'simple': True,
                                  'metadata': True},
                        'aggregation': {'standard': True}},
     }
     return utils.update_nested(self.DEFAULT_CAPABILITIES, available)
Ejemplo n.º 5
0
    def __init__(self, conf):
        """Hbase Connection Initialization."""
        opts = self._parse_connection_url(conf.database.connection)

        if opts['host'] == '__test__':
            url = os.environ.get('CEILOMETER_TEST_HBASE_URL')
            if url:
                # Reparse URL, but from the env variable now
                opts = self._parse_connection_url(url)
                self.conn_pool = self._get_connection_pool(opts)
            else:
                # This is a in-memory usage for unit tests
                if Connection._memory_instance is None:
                    LOG.debug(_('Creating a new in-memory HBase '
                              'Connection object'))
                    Connection._memory_instance = MConnectionPool()
                self.conn_pool = Connection._memory_instance
        else:
            self.conn_pool = self._get_connection_pool(opts)

        self.CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES,
                                                AVAILABLE_CAPABILITIES)
Ejemplo n.º 6
0
class Connection(base.Connection):
    """Put the data into a HBase database

    Collections:

    - meter (describes sample actually):

      - row-key: consists of reversed timestamp, meter and a message signature
        for purposes of uniqueness
      - Column Families:

        f: contains the following qualifiers:

          - counter_name: <name of counter>
          - counter_type: <type of counter>
          - counter_unit: <unit of counter>
          - counter_volume: <volume of counter>
          - message: <raw incoming data>
          - message_id: <id of message>
          - message_signature: <signature of message>
          - resource_metadata: raw metadata for corresponding resource
            of the meter
          - project_id: <id of project>
          - resource_id: <id of resource>
          - user_id: <id of user>
          - recorded_at: <datetime when sample has been recorded (utc.now)>
          - flattened metadata with prefix r_metadata. e.g.::

             f:r_metadata.display_name or f:r_metadata.tag

          - rts: <reversed timestamp of entry>
          - timestamp: <meter's timestamp (came from message)>
          - source for meter with prefix 's'

    - resource:

      - row_key: uuid of resource
      - Column Families:

        f: contains the following qualifiers:

          - resource_metadata: raw metadata for corresponding resource
          - project_id: <id of project>
          - resource_id: <id of resource>
          - user_id: <id of user>
          - flattened metadata with prefix r_metadata. e.g.::

             f:r_metadata.display_name or f:r_metadata.tag

          - sources for all corresponding meters with prefix 's'
          - all meters for this resource in format:

            .. code-block:: python

              "%s+%s+%s!%s!%s" % (rts, source, counter_name, counter_type,
              counter_unit)

    - events:

      - row_key: timestamp of event's generation + uuid of event
        in format: "%s+%s" % (ts, Event.message_id)
      - Column Families:

        f: contains the following qualifiers:

          - event_type: description of event's type
          - timestamp: time stamp of event generation
          - all traits for this event in format:

            .. code-block:: python

              "%s+%s" % (trait_name, trait_type)
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    _memory_instance = None

    RESOURCE_TABLE = "resource"
    METER_TABLE = "meter"
    EVENT_TABLE = "event"

    def __init__(self, url):
        """Hbase Connection Initialization."""
        opts = self._parse_connection_url(url)

        if opts['host'] == '__test__':
            url = os.environ.get('CEILOMETER_TEST_HBASE_URL')
            if url:
                # Reparse URL, but from the env variable now
                opts = self._parse_connection_url(url)
                self.conn_pool = self._get_connection_pool(opts)
            else:
                # This is a in-memory usage for unit tests
                if Connection._memory_instance is None:
                    LOG.debug(
                        _('Creating a new in-memory HBase '
                          'Connection object'))
                    Connection._memory_instance = (
                        hbase_inmemory.MConnectionPool())
                self.conn_pool = Connection._memory_instance
        else:
            self.conn_pool = self._get_connection_pool(opts)

    def upgrade(self):
        with self.conn_pool.connection() as conn:
            conn.create_table(self.RESOURCE_TABLE, {'f': dict(max_versions=1)})
            conn.create_table(self.METER_TABLE, {'f': dict(max_versions=1)})
            conn.create_table(self.EVENT_TABLE, {'f': dict(max_versions=1)})

    def clear(self):
        LOG.debug(_('Dropping HBase schema...'))
        with self.conn_pool.connection() as conn:
            for table in [
                    self.RESOURCE_TABLE, self.METER_TABLE, self.EVENT_TABLE
            ]:
                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug(_('Cannot disable table but ignoring error'))
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug(_('Cannot delete table but ignoring error'))

    @staticmethod
    def _get_connection_pool(conf):
        """Return a connection pool to the database.

        .. note::

          The tests use a subclass to override this and return an
          in-memory connection pool.
        """
        LOG.debug(
            _('connecting to HBase on %(host)s:%(port)s') %
            ({
                'host': conf['host'],
                'port': conf['port']
            }))
        return happybase.ConnectionPool(size=100,
                                        host=conf['host'],
                                        port=conf['port'],
                                        table_prefix=conf['table_prefix'])

    @staticmethod
    def _parse_connection_url(url):
        """Parse connection parameters from a database url.

        .. note::

          HBase Thrift does not support authentication and there is no
          database name, so we are not looking for these in the url.
        """
        opts = {}
        result = network_utils.urlsplit(url)
        opts['table_prefix'] = urlparse.parse_qs(result.query).get(
            'table_prefix', [None])[0]
        opts['dbtype'] = result.scheme
        if ':' in result.netloc:
            opts['host'], port = result.netloc.split(':')
        else:
            opts['host'] = result.netloc
            port = 9090
        opts['port'] = port and int(port) or 9090
        return opts

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
          ceilometer.meter.meter_message_from_counter
        """
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            meter_table = conn.table(self.METER_TABLE)

            resource_metadata = data.get('resource_metadata', {})
            # Determine the name of new meter
            rts = hbase_utils.timestamp(data['timestamp'])
            new_meter = hbase_utils.format_meter_reference(
                data['counter_name'], data['counter_type'],
                data['counter_unit'], rts, data['source'])

            # TODO(nprivalova): try not to store resource_id
            resource = hbase_utils.serialize_entry(
                **{
                    'source': data['source'],
                    'meter': {
                        new_meter: data['timestamp']
                    },
                    'resource_metadata': resource_metadata,
                    'resource_id': data['resource_id'],
                    'project_id': data['project_id'],
                    'user_id': data['user_id']
                })
            # Here we put entry in HBase with our own timestamp. This is needed
            # when samples arrive out-of-order
            # If we use timestamp=data['timestamp'] the newest data will be
            # automatically 'on the top'. It is needed to keep metadata
            # up-to-date: metadata from newest samples is considered as actual.
            ts = int(time.mktime(data['timestamp'].timetuple()) * 1000)
            resource_table.put(data['resource_id'], resource, ts)

            # Rowkey consists of reversed timestamp, meter and a
            # message signature for purposes of uniqueness
            row = "%s_%d_%s" % (data['counter_name'], rts,
                                data['message_signature'])
            record = hbase_utils.serialize_entry(
                data, **{
                    'source': data['source'],
                    'rts': rts,
                    'message': data,
                    'recorded_at': timeutils.utcnow()
                })
            meter_table.put(row, record)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      pagination=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like ge, gt.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        q = hbase_utils.make_query(metaquery=metaquery,
                                   user_id=user,
                                   project_id=project,
                                   resource_id=resource,
                                   source=source)
        q = hbase_utils.make_meter_query_for_resource(start_timestamp,
                                                      start_timestamp_op,
                                                      end_timestamp,
                                                      end_timestamp_op, source,
                                                      q)
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            LOG.debug(_("Query Resource table: %s") % q)
            for resource_id, data in resource_table.scan(filter=q):
                f_res, sources, meters, md = hbase_utils.deserialize_entry(
                    data)
                # Unfortunately happybase doesn't keep ordered result from
                # HBase. So that's why it's needed to find min and max
                # manually
                first_ts = min(meters, key=operator.itemgetter(1))[1]
                last_ts = max(meters, key=operator.itemgetter(1))[1]
                source = meters[0][0].split('+')[1]
                # If we use QualifierFilter then HBase returnes only
                # qualifiers filtered by. It will not return the whole entry.
                # That's why if we need to ask additional qualifiers manually.
                if 'project_id' not in f_res and 'user_id' not in f_res:
                    row = resource_table.row(resource_id,
                                             columns=[
                                                 'f:project_id', 'f:user_id',
                                                 'f:resource_metadata'
                                             ])
                    f_res, _s, _m, md = hbase_utils.deserialize_entry(row)
                yield models.Resource(resource_id=resource_id,
                                      first_sample_timestamp=first_ts,
                                      last_sample_timestamp=last_ts,
                                      project_id=f_res['project_id'],
                                      source=source,
                                      user_id=f_res['user_id'],
                                      metadata=md)

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   pagination=None):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        metaquery = metaquery or {}

        if pagination:
            raise NotImplementedError(_('Pagination not implemented'))
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            q = hbase_utils.make_query(metaquery=metaquery,
                                       user_id=user,
                                       project_id=project,
                                       resource_id=resource,
                                       source=source)
            LOG.debug(_("Query Resource table: %s") % q)

            gen = resource_table.scan(filter=q)
            # We need result set to be sure that user doesn't receive several
            # same meters. Please see bug
            # https://bugs.launchpad.net/ceilometer/+bug/1301371
            result = set()
            for ignored, data in gen:
                flatten_result, s, meters, md = hbase_utils.deserialize_entry(
                    data)
                for m in meters:
                    _m_rts, m_source, m_raw = m[0].split("+")
                    name, type, unit = m_raw.split('!')
                    meter_dict = {
                        'name': name,
                        'type': type,
                        'unit': unit,
                        'resource_id': flatten_result['resource_id'],
                        'project_id': flatten_result['project_id'],
                        'user_id': flatten_result['user_id']
                    }
                    frozen_meter = frozenset(meter_dict.items())
                    if frozen_meter in result:
                        continue
                    result.add(frozen_meter)
                    meter_dict.update(
                        {'source': m_source if m_source else None})

                    yield models.Meter(**meter_dict)

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of models.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return
        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop, columns = (
                hbase_utils.make_sample_query_from_filter(sample_filter,
                                                          require_meter=False))
            LOG.debug(_("Query Meter Table: %s") % q)
            gen = meter_table.scan(filter=q,
                                   row_start=start,
                                   row_stop=stop,
                                   limit=limit)
            for ignored, meter in gen:
                d_meter = hbase_utils.deserialize_entry(meter)[0]
                d_meter['message']['recorded_at'] = d_meter['recorded_at']
                yield models.Sample(**d_meter['message'])

    @staticmethod
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = (timeutils.delta_seconds(stat.duration_start,
                                                 stat.duration_end))

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.

        .. note::

          Due to HBase limitations the aggregations are implemented
          in the driver itself, therefore this method will be quite slow
          because of all the Thrift traffic it is going to create.
        """
        if groupby:
            raise NotImplementedError("Group by not implemented.")

        if aggregate:
            raise NotImplementedError('Selectable aggregates not implemented')

        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop, columns = (
                hbase_utils.make_sample_query_from_filter(sample_filter))
            # These fields are used in statistics' calculating
            columns.extend(
                ['f:timestamp', 'f:counter_volume', 'f:counter_unit'])
            meters = map(
                hbase_utils.deserialize_entry,
                list(meter for (ignored, meter) in meter_table.scan(
                    filter=q, row_start=start, row_stop=stop,
                    columns=columns)))

        if sample_filter.start:
            start_time = sample_filter.start
        elif meters:
            start_time = meters[-1][0]['timestamp']
        else:
            start_time = None

        if sample_filter.end:
            end_time = sample_filter.end
        elif meters:
            end_time = meters[0][0]['timestamp']
        else:
            end_time = None

        results = []

        if not period:
            period = 0
            period_start = start_time
            period_end = end_time

        # As our HBase meters are stored as newest-first, we need to iterate
        # in the reverse order
        for meter in meters[::-1]:
            ts = meter[0]['timestamp']
            if period:
                offset = int(
                    timeutils.delta_seconds(start_time, ts) / period) * period
                period_start = start_time + datetime.timedelta(0, offset)

            if not results or not results[-1].period_start == period_start:
                if period:
                    period_end = period_start + datetime.timedelta(0, period)
                results.append(
                    models.Statistics(unit='',
                                      count=0,
                                      min=0,
                                      max=0,
                                      avg=0,
                                      sum=0,
                                      period=period,
                                      period_start=period_start,
                                      period_end=period_end,
                                      duration=None,
                                      duration_start=None,
                                      duration_end=None,
                                      groupby=None))
            self._update_meter_stats(results[-1], meter[0])
        return results

    def record_events(self, event_models):
        """Write the events to Hbase.

        :param event_models: a list of models.Event objects.
        :return problem_events: a list of events that could not be saved in a
          (reason, event) tuple. From the reasons that are enumerated in
          storage.models.Event only the UNKNOWN_PROBLEM is applicable here.
        """
        problem_events = []

        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            for event_model in event_models:
                # Row key consists of timestamp and message_id from
                # models.Event or purposes of storage event sorted by
                # timestamp in the database.
                ts = event_model.generated
                row = "%d_%s" % (hbase_utils.timestamp(
                    ts, reverse=False), event_model.message_id)
                event_type = event_model.event_type
                traits = {}
                if event_model.traits:
                    for trait in event_model.traits:
                        key = "%s+%d" % (trait.name, trait.dtype)
                        traits[key] = trait.value
                record = hbase_utils.serialize_entry(traits,
                                                     event_type=event_type,
                                                     timestamp=ts)
                try:
                    events_table.put(row, record)
                except Exception as ex:
                    LOG.debug(_("Failed to record event: %s") % ex)
                    problem_events.append(
                        (models.Event.UNKNOWN_PROBLEM, event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return an iter of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
          for events that are stored in database.
        """
        q, start, stop = hbase_utils.make_events_query_from_filter(
            event_filter)
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)

            gen = events_table.scan(filter=q, row_start=start, row_stop=stop)

        for event_id, data in gen:
            traits = []
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if (not key.startswith('event_type')
                        and not key.startswith('timestamp')):
                    trait_name, trait_dtype = key.rsplit('+', 1)
                    traits.append(
                        models.Trait(name=trait_name,
                                     dtype=int(trait_dtype),
                                     value=value))
            ts, mess = event_id.split('_', 1)

            yield models.Event(message_id=mess,
                               event_type=events_dict['event_type'],
                               generated=events_dict['timestamp'],
                               traits=sorted(traits,
                                             key=operator.attrgetter('dtype')))

    def get_event_types(self):
        """Return all event types as an iterable of strings."""
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan()

        event_types = set()
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if key.startswith('event_type'):
                    if value not in event_types:
                        event_types.add(value)
                        yield value

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event
        """

        q = hbase_utils.make_query(event_type=event_type)
        trait_names = set()
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan(filter=q)
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if (not key.startswith('event_type')
                        and not key.startswith('timestamp')):
                    trait_name, trait_type = key.rsplit('+', 1)
                    if trait_name not in trait_names:
                        # Here we check that our method return only unique
                        # trait types, for ex. if it is found the same trait
                        # types in different events with equal event_type,
                        # method will return only one trait type. It is
                        # proposed that certain trait name could have only one
                        # trait type.
                        trait_names.add(trait_name)
                        data_type = models.Trait.type_names[int(trait_type)]
                        yield {'name': trait_name, 'data_type': data_type}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """
        q = hbase_utils.make_query(event_type=event_type,
                                   trait_type=trait_type)
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan(filter=q)
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if (not key.startswith('event_type')
                        and not key.startswith('timestamp')):
                    trait_name, trait_type = key.rsplit('+', 1)
                    yield models.Trait(name=trait_name,
                                       dtype=int(trait_type),
                                       value=value)
Ejemplo n.º 7
0
class Connection(base.Connection):
    """Base Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   limit=None):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return

        metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {}

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if resource is not None:
            q['_id'] = resource
        if source is not None:
            q['source'] = source
        q.update(metaquery)

        count = 0
        for r in self.db.resource.find(q):
            for r_meter in r['meter']:
                if limit and count >= limit:
                    return
                else:
                    count += 1
                yield models.Meter(
                    name=r_meter['counter_name'],
                    type=r_meter['counter_type'],
                    # Return empty string if 'counter_unit' is not valid for
                    # backward compatibility.
                    unit=r_meter.get('counter_unit', ''),
                    resource_id=r['_id'],
                    project_id=r['project_id'],
                    source=r['source'],
                    user_id=r['user_id'],
                )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of model.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []
        q = pymongo_utils.make_query_from_filter(sample_filter,
                                                 require_meter=False)

        return self._retrieve_samples(q, [("timestamp", pymongo.DESCENDING)],
                                      limit)

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        if limit == 0:
            return []
        query_filter = {}
        orderby_filter = [("timestamp", pymongo.DESCENDING)]
        transformer = pymongo_utils.QueryTransformer()
        if orderby is not None:
            orderby_filter = transformer.transform_orderby(orderby)
        if filter_expr is not None:
            query_filter = transformer.transform_filter(filter_expr)

        return self._retrieve_samples(query_filter, orderby_filter, limit)

    def _retrieve_samples(self, query, orderby, limit):
        if limit is not None:
            samples = self.db.meter.find(query, limit=limit, sort=orderby)
        else:
            samples = self.db.meter.find(query, sort=orderby)

        for s in samples:
            # Remove the ObjectId generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            del s['_id']
            # Backward compatibility for samples without units
            s['counter_unit'] = s.get('counter_unit', '')
            # Tolerate absence of recorded_at in older datapoints
            s['recorded_at'] = s.get('recorded_at')
            # Check samples for metadata and "unquote" key if initially it
            # was started with '$'.
            if s.get('resource_metadata'):
                s['resource_metadata'] = pymongo_utils.unquote_keys(
                    s.get('resource_metadata'))
            yield models.Sample(**s)
Ejemplo n.º 8
0
class Connection(base.Connection):
    """HBase connection.
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)

    _memory_instance = None

    PROJECT_TABLE = "project"
    USER_TABLE = "user"
    RESOURCE_TABLE = "resource"
    METER_TABLE = "meter"
    ALARM_TABLE = "alarm"
    ALARM_HISTORY_TABLE = "alarm_h"

    def __init__(self, conf):
        """Hbase Connection Initialization."""
        opts = self._parse_connection_url(conf.database.connection)

        if opts['host'] == '__test__':
            url = os.environ.get('CEILOMETER_TEST_HBASE_URL')
            if url:
                # Reparse URL, but from the env variable now
                opts = self._parse_connection_url(url)
                self.conn_pool = self._get_connection_pool(opts)
            else:
                # This is a in-memory usage for unit tests
                if Connection._memory_instance is None:
                    LOG.debug(
                        _('Creating a new in-memory HBase '
                          'Connection object'))
                    Connection._memory_instance = MConnectionPool()
                self.conn_pool = Connection._memory_instance
        else:
            self.conn_pool = self._get_connection_pool(opts)

    def upgrade(self):
        with self.conn_pool.connection() as conn:
            conn.create_table(self.PROJECT_TABLE, {'f': dict()})
            conn.create_table(self.USER_TABLE, {'f': dict()})
            conn.create_table(self.RESOURCE_TABLE, {'f': dict()})
            conn.create_table(self.METER_TABLE, {'f': dict()})
            conn.create_table(self.ALARM_TABLE, {'f': dict()})
            conn.create_table(self.ALARM_HISTORY_TABLE, {'f': dict()})

    def clear(self):
        LOG.debug(_('Dropping HBase schema...'))
        with self.conn_pool.connection() as conn:
            for table in [
                    self.PROJECT_TABLE, self.USER_TABLE, self.RESOURCE_TABLE,
                    self.METER_TABLE, self.ALARM_TABLE,
                    self.ALARM_HISTORY_TABLE
            ]:

                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug(_('Cannot disable table but ignoring error'))
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug(_('Cannot delete table but ignoring error'))

    @staticmethod
    def _get_connection_pool(conf):
        """Return a connection pool to the database.

        .. note::

          The tests use a subclass to override this and return an
          in-memory connection pool.
        """
        LOG.debug(
            _('connecting to HBase on %(host)s:%(port)s') %
            ({
                'host': conf['host'],
                'port': conf['port']
            }))
        return happybase.ConnectionPool(size=100,
                                        host=conf['host'],
                                        port=conf['port'],
                                        table_prefix=conf['table_prefix'])

    @staticmethod
    def _parse_connection_url(url):
        """Parse connection parameters from a database url.

        .. note::

        HBase Thrift does not support authentication and there is no
        database name, so we are not looking for these in the url.
        """
        opts = {}
        result = network_utils.urlsplit(url)
        opts['table_prefix'] = urlparse.parse_qs(result.query).get(
            'table_prefix', [None])[0]
        opts['dbtype'] = result.scheme
        if ':' in result.netloc:
            opts['host'], port = result.netloc.split(':')
        else:
            opts['host'] = result.netloc
            port = 9090
        opts['port'] = port and int(port) or 9090
        return opts

    def update_alarm(self, alarm):
        """Create an alarm.
        :param alarm: The alarm to create. It is Alarm object, so we need to
        call as_dict()
        """
        _id = alarm.alarm_id
        alarm_to_store = serialize_entry(alarm.as_dict())
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.put(_id, alarm_to_store)
            stored_alarm = deserialize_entry(alarm_table.row(_id))[0]
        return models.Alarm(**stored_alarm)

    create_alarm = update_alarm

    def delete_alarm(self, alarm_id):
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.delete(alarm_id)

    def get_alarms(self,
                   name=None,
                   user=None,
                   project=None,
                   enabled=None,
                   alarm_id=None,
                   pagination=None):

        if pagination:
            raise NotImplementedError('Pagination not implemented')

        q = make_query(alarm_id=alarm_id,
                       name=name,
                       enabled=enabled,
                       user_id=user,
                       project_id=project)

        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            gen = alarm_table.scan(filter=q)
            for ignored, data in gen:
                stored_alarm = deserialize_entry(data)[0]
                yield models.Alarm(**stored_alarm)

    def get_alarm_changes(self,
                          alarm_id,
                          on_behalf_of,
                          user=None,
                          project=None,
                          type=None,
                          start_timestamp=None,
                          start_timestamp_op=None,
                          end_timestamp=None,
                          end_timestamp_op=None):
        q = make_query(alarm_id=alarm_id,
                       on_behalf_of=on_behalf_of,
                       type=type,
                       user_id=user,
                       project_id=project)
        start_row, end_row = make_timestamp_query(_make_general_rowkey_scan,
                                                  start=start_timestamp,
                                                  start_op=start_timestamp_op,
                                                  end=end_timestamp,
                                                  end_op=end_timestamp_op,
                                                  bounds_only=True,
                                                  some_id=alarm_id)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            gen = alarm_history_table.scan(filter=q,
                                           row_start=start_row,
                                           row_stop=end_row)
            for ignored, data in gen:
                stored_entry = deserialize_entry(data)[0]
                yield models.AlarmChange(**stored_entry)

    def record_alarm_change(self, alarm_change):
        """Record alarm change event.
        """
        alarm_change_dict = serialize_entry(alarm_change)
        ts = alarm_change.get('timestamp') or datetime.datetime.now()
        rts = reverse_timestamp(ts)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            alarm_history_table.put(
                alarm_change.get('alarm_id') + "_" + str(rts),
                alarm_change_dict)

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        with self.conn_pool.connection() as conn:
            project_table = conn.table(self.PROJECT_TABLE)
            user_table = conn.table(self.USER_TABLE)
            resource_table = conn.table(self.RESOURCE_TABLE)
            meter_table = conn.table(self.METER_TABLE)

            # Make sure we know about the user and project
            if data['user_id']:
                self._update_sources(user_table, data['user_id'],
                                     data['source'])
            self._update_sources(project_table, data['project_id'],
                                 data['source'])

            # Get metadata from user's data
            resource_metadata = data.get('resource_metadata', {})
            # Determine the name of new meter
            new_meter = _format_meter_reference(data['counter_name'],
                                                data['counter_type'],
                                                data['counter_unit'])
            flatten_result, sources, meters, metadata = \
                deserialize_entry(resource_table.row(data['resource_id']))

            # Update if resource has new information
            if (data['source']
                    not in sources) or (new_meter not in meters) or (
                        metadata != resource_metadata):
                resource_table.put(
                    data['resource_id'],
                    serialize_entry(
                        **{
                            'sources': [data['source']],
                            'meters': [new_meter],
                            'metadata': resource_metadata,
                            'resource_id': data['resource_id'],
                            'project_id': data['project_id'],
                            'user_id': data['user_id']
                        }))

            # Rowkey consists of reversed timestamp, meter and an md5 of
            # user+resource+project for purposes of uniqueness
            m = hashlib.md5()
            m.update(
                "%s%s%s" %
                (data['user_id'], data['resource_id'], data['project_id']))

            # We use reverse timestamps in rowkeys as they are sorted
            # alphabetically.
            rts = reverse_timestamp(data['timestamp'])
            row = "%s_%d_%s" % (data['counter_name'], rts, m.hexdigest())
            record = serialize_entry(
                data, **{
                    'metadata': resource_metadata,
                    'rts': rts,
                    'message': data,
                    'recorded_at': timeutils.utcnow()
                })
            meter_table.put(row, record)

    def _update_sources(self, table, id, source):
        user, sources, _, _ = deserialize_entry(table.row(id))
        if source not in sources:
            sources.append(source)
            table.put(id, serialize_entry(user, **{'sources': sources}))

    def get_users(self, source=None):
        """Return an iterable of user id strings.

        :param source: Optional source filter.
        """
        with self.conn_pool.connection() as conn:
            user_table = conn.table(self.USER_TABLE)
            LOG.debug(_("source: %s") % source)
            scan_args = {}
            if source:
                scan_args['columns'] = ['f:s_%s' % source]
            return sorted(key for key, ignored in user_table.scan(**scan_args))

    def get_projects(self, source=None):
        """Return an iterable of project id strings.

        :param source: Optional source filter.
        """
        with self.conn_pool.connection() as conn:
            project_table = conn.table(self.PROJECT_TABLE)
            LOG.debug(_("source: %s") % source)
            scan_args = {}
            if source:
                scan_args['columns'] = ['f:s_%s' % source]
            return (key for key, ignored in project_table.scan(**scan_args))

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery={},
                      resource=None,
                      pagination=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like ge, gt.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        sample_filter = storage.SampleFilter(
            user=user,
            project=project,
            start=start_timestamp,
            start_timestamp_op=start_timestamp_op,
            end=end_timestamp,
            end_timestamp_op=end_timestamp_op,
            resource=resource,
            source=source,
            metaquery=metaquery)
        q, start_row, stop_row = make_sample_query_from_filter(
            sample_filter, require_meter=False)

        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            LOG.debug(_("Query Meter table: %s") % q)
            meters = meter_table.scan(filter=q,
                                      row_start=start_row,
                                      row_stop=stop_row)
            d_meters = []
            for i, m in meters:
                d_meters.append(deserialize_entry(m))

            # We have to sort on resource_id before we can group by it.
            # According to the itertools documentation a new group is
            # generated when the value of the key function changes
            # (it breaks there).
            meters = sorted(d_meters, key=_resource_id_from_record_tuple)
            for resource_id, r_meters in itertools.groupby(
                    meters, key=_resource_id_from_record_tuple):
                # We need deserialized entry(data[0]) and metadata(data[3])
                meter_rows = [(data[0], data[3]) for data in sorted(
                    r_meters, key=_timestamp_from_record_tuple)]
                latest_data = meter_rows[-1]
                min_ts = meter_rows[0][0]['timestamp']
                max_ts = latest_data[0]['timestamp']
                yield models.Resource(
                    resource_id=resource_id,
                    first_sample_timestamp=min_ts,
                    last_sample_timestamp=max_ts,
                    project_id=latest_data[0]['project_id'],
                    source=latest_data[0]['source'],
                    user_id=latest_data[0]['user_id'],
                    metadata=latest_data[1],
                )

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery={},
                   pagination=None):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise NotImplementedError(_('Pagination not implemented'))
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            q = make_query(metaquery=metaquery,
                           user_id=user,
                           project_id=project,
                           resource_id=resource,
                           source=source)
            LOG.debug(_("Query Resource table: %s") % q)

            gen = resource_table.scan(filter=q)

            for ignored, data in gen:
                flatten_result, s, m, md = deserialize_entry(data)
                if not m:
                    continue
                # Meter table may have only one "meter" and "source". That's
                # why only first lists element is get in this method
                name, type, unit = m[0].split("!")
                yield models.Meter(
                    name=name,
                    type=type,
                    unit=unit,
                    resource_id=flatten_result['resource_id'],
                    project_id=flatten_result['project_id'],
                    source=s[0] if s else None,
                    user_id=flatten_result['user_id'],
                )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of models.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)

            q, start, stop = make_sample_query_from_filter(sample_filter,
                                                           require_meter=False)
            LOG.debug(_("Query Meter Table: %s") % q)
            gen = meter_table.scan(filter=q, row_start=start, row_stop=stop)
            for ignored, meter in gen:
                if limit is not None:
                    if limit == 0:
                        break
                    else:
                        limit -= 1
                d_meter = deserialize_entry(meter)[0]
                d_meter['message']['recorded_at'] = d_meter['recorded_at']
                yield models.Sample(**d_meter['message'])

    @staticmethod
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = \
            timeutils.delta_seconds(stat.duration_start,
                                    stat.duration_end)

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instances containing meter
        statistics described by the query parameters.

        The filter must have a meter value set.

        .. note::

           Due to HBase limitations the aggregations are implemented
           in the driver itself, therefore this method will be quite slow
           because of all the Thrift traffic it is going to create.

        """
        if groupby:
            raise NotImplementedError("Group by not implemented.")

        if aggregate:
            raise NotImplementedError('Selectable aggregates not implemented')

        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop = make_sample_query_from_filter(sample_filter)
            meters = map(
                deserialize_entry,
                list(meter for (ignored, meter) in meter_table.scan(
                    filter=q, row_start=start, row_stop=stop)))

        if sample_filter.start:
            start_time = sample_filter.start
        elif meters:
            start_time = meters[-1][0]['timestamp']
        else:
            start_time = None

        if sample_filter.end:
            end_time = sample_filter.end
        elif meters:
            end_time = meters[0][0]['timestamp']
        else:
            end_time = None

        results = []

        if not period:
            period = 0
            period_start = start_time
            period_end = end_time

        # As our HBase meters are stored as newest-first, we need to iterate
        # in the reverse order
        for meter in meters[::-1]:
            ts = meter[0]['timestamp']
            if period:
                offset = int(
                    timeutils.delta_seconds(start_time, ts) / period) * period
                period_start = start_time + datetime.timedelta(0, offset)

            if not results or not results[-1].period_start == \
                    period_start:
                if period:
                    period_end = period_start + datetime.timedelta(0, period)
                results.append(
                    models.Statistics(unit='',
                                      count=0,
                                      min=0,
                                      max=0,
                                      avg=0,
                                      sum=0,
                                      period=period,
                                      period_start=period_start,
                                      period_end=period_end,
                                      duration=None,
                                      duration_start=None,
                                      duration_end=None,
                                      groupby=None))
            self._update_meter_stats(results[-1], meter[0])
        return results
Ejemplo n.º 9
0
class Connection(pymongo_base.Connection):
    """Put the data into a MongoDB database

    Collections::

        - meter
          - the raw incoming data
        - resource
          - the metadata for resources
          - { _id: uuid of resource,
              metadata: metadata dictionaries
              user_id: uuid
              project_id: uuid
              meter: [ array of {counter_name: string, counter_type: string,
                                 counter_unit: string} ]
            }
    """

    CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    CONNECTION_POOL = pymongo_utils.ConnectionPool()

    REDUCE_GROUP_CLEAN = bson.code.Code("""
    function ( curr, result ) {
        if (result.resources.indexOf(curr.resource_id) < 0)
            result.resources.push(curr.resource_id);
    }
    """)

    STANDARD_AGGREGATES = dict(
        emit_initial=dict(
            sum='',
            count='',
            avg='',
            min='',
            max=''
        ),
        emit_body=dict(
            sum='sum: this.counter_volume,',
            count='count: NumberInt(1),',
            avg='acount: NumberInt(1), asum: this.counter_volume,',
            min='min: this.counter_volume,',
            max='max: this.counter_volume,'
        ),
        reduce_initial=dict(
            sum='',
            count='',
            avg='',
            min='',
            max=''
        ),
        reduce_body=dict(
            sum='sum: values[0].sum,',
            count='count: values[0].count,',
            avg='acount: values[0].acount, asum: values[0].asum,',
            min='min: values[0].min,',
            max='max: values[0].max,'
        ),
        reduce_computation=dict(
            sum='res.sum += values[i].sum;',
            count='res.count = NumberInt(res.count + values[i].count);',
            avg=('res.acount = NumberInt(res.acount + values[i].acount);'
                 'res.asum += values[i].asum;'),
            min='if ( values[i].min < res.min ) {res.min = values[i].min;}',
            max='if ( values[i].max > res.max ) {res.max = values[i].max;}'
        ),
        finalize=dict(
            sum='',
            count='',
            avg='value.avg = value.asum / value.acount;',
            min='',
            max=''
        ),
    )

    UNPARAMETERIZED_AGGREGATES = dict(
        emit_initial=dict(
            stddev=(
                ''
            )
        ),
        emit_body=dict(
            stddev='sdsum: this.counter_volume,'
                   'sdcount: 1,'
                   'weighted_distances: 0,'
                   'stddev: 0,'
        ),
        reduce_initial=dict(
            stddev=''
        ),
        reduce_body=dict(
            stddev='sdsum: values[0].sdsum,'
                   'sdcount: values[0].sdcount,'
                   'weighted_distances: values[0].weighted_distances,'
                   'stddev: values[0].stddev,'
        ),
        reduce_computation=dict(
            stddev=(
                'var deviance = (res.sdsum / res.sdcount) - values[i].sdsum;'
                'var weight = res.sdcount / ++res.sdcount;'
                'res.weighted_distances += (Math.pow(deviance, 2) * weight);'
                'res.sdsum += values[i].sdsum;'
            )
        ),
        finalize=dict(
            stddev=(
                'value.stddev = Math.sqrt(value.weighted_distances /'
                '  value.sdcount);'
            )
        ),
    )

    PARAMETERIZED_AGGREGATES = dict(
        validate=dict(
            cardinality=lambda p: p in ['resource_id', 'user_id', 'project_id',
                                        'source']
        ),
        emit_initial=dict(
            cardinality=(
                'aggregate["cardinality/%(aggregate_param)s"] = 1;'
                'var distinct_%(aggregate_param)s = {};'
                'distinct_%(aggregate_param)s[this["%(aggregate_param)s"]]'
                '   = true;'
            )
        ),
        emit_body=dict(
            cardinality=(
                'distinct_%(aggregate_param)s : distinct_%(aggregate_param)s,'
                '%(aggregate_param)s : this["%(aggregate_param)s"],'
            )
        ),
        reduce_initial=dict(
            cardinality=''
        ),
        reduce_body=dict(
            cardinality=(
                'aggregate : values[0].aggregate,'
                'distinct_%(aggregate_param)s:'
                '  values[0].distinct_%(aggregate_param)s,'
                '%(aggregate_param)s : values[0]["%(aggregate_param)s"],'
            )
        ),
        reduce_computation=dict(
            cardinality=(
                'if (!(values[i]["%(aggregate_param)s"] in'
                '      res.distinct_%(aggregate_param)s)) {'
                '  res.distinct_%(aggregate_param)s[values[i]'
                '    ["%(aggregate_param)s"]] = true;'
                '  res.aggregate["cardinality/%(aggregate_param)s"] += 1;}'
            )
        ),
        finalize=dict(
            cardinality=''
        ),
    )

    EMIT_STATS_COMMON = """
        var aggregate = {};
        %(aggregate_initial_placeholder)s
        emit(%(key_val)s, { unit: this.counter_unit,
                            aggregate : aggregate,
                            %(aggregate_body_placeholder)s
                            groupby : %(groupby_val)s,
                            duration_start : this.timestamp,
                            duration_end : this.timestamp,
                            period_start : %(period_start_val)s,
                            period_end : %(period_end_val)s} )
    """

    MAP_STATS_PERIOD_VAR = """
        var period = %(period)d * 1000;
        var period_first = %(period_first)d * 1000;
        var period_start = period_first
                           + (Math.floor(new Date(this.timestamp.getTime()
                                         - period_first) / period)
                              * period);
    """

    MAP_STATS_GROUPBY_VAR = """
        var groupby_fields = %(groupby_fields)s;
        var groupby = {};
        var groupby_key = {};

        for ( var i=0; i<groupby_fields.length; i++ ) {
            groupby[groupby_fields[i]] = this[groupby_fields[i]]
            groupby_key[groupby_fields[i]] = this[groupby_fields[i]]
        }
    """

    PARAMS_MAP_STATS = {
        'key_val': '\'statistics\'',
        'groupby_val': 'null',
        'period_start_val': 'this.timestamp',
        'period_end_val': 'this.timestamp',
        'aggregate_initial_placeholder': '%(aggregate_initial_val)s',
        'aggregate_body_placeholder': '%(aggregate_body_val)s'
    }

    MAP_STATS = bson.code.Code("function () {" +
                               EMIT_STATS_COMMON % PARAMS_MAP_STATS +
                               "}")

    PARAMS_MAP_STATS_PERIOD = {
        'key_val': 'period_start',
        'groupby_val': 'null',
        'period_start_val': 'new Date(period_start)',
        'period_end_val': 'new Date(period_start + period)',
        'aggregate_initial_placeholder': '%(aggregate_initial_val)s',
        'aggregate_body_placeholder': '%(aggregate_body_val)s'
    }

    MAP_STATS_PERIOD = bson.code.Code(
        "function () {" +
        MAP_STATS_PERIOD_VAR +
        EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD +
        "}")

    PARAMS_MAP_STATS_GROUPBY = {
        'key_val': 'groupby_key',
        'groupby_val': 'groupby',
        'period_start_val': 'this.timestamp',
        'period_end_val': 'this.timestamp',
        'aggregate_initial_placeholder': '%(aggregate_initial_val)s',
        'aggregate_body_placeholder': '%(aggregate_body_val)s'
    }

    MAP_STATS_GROUPBY = bson.code.Code(
        "function () {" +
        MAP_STATS_GROUPBY_VAR +
        EMIT_STATS_COMMON % PARAMS_MAP_STATS_GROUPBY +
        "}")

    PARAMS_MAP_STATS_PERIOD_GROUPBY = {
        'key_val': 'groupby_key',
        'groupby_val': 'groupby',
        'period_start_val': 'new Date(period_start)',
        'period_end_val': 'new Date(period_start + period)',
        'aggregate_initial_placeholder': '%(aggregate_initial_val)s',
        'aggregate_body_placeholder': '%(aggregate_body_val)s'
    }

    MAP_STATS_PERIOD_GROUPBY = bson.code.Code(
        "function () {" +
        MAP_STATS_PERIOD_VAR +
        MAP_STATS_GROUPBY_VAR +
        "    groupby_key['period_start'] = period_start\n" +
        EMIT_STATS_COMMON % PARAMS_MAP_STATS_PERIOD_GROUPBY +
        "}")

    REDUCE_STATS = bson.code.Code("""
    function (key, values) {
        %(aggregate_initial_val)s
        var res = { unit: values[0].unit,
                    aggregate: values[0].aggregate,
                    %(aggregate_body_val)s
                    groupby: values[0].groupby,
                    period_start: values[0].period_start,
                    period_end: values[0].period_end,
                    duration_start: values[0].duration_start,
                    duration_end: values[0].duration_end };
        for ( var i=1; i<values.length; i++ ) {
            %(aggregate_computation_val)s
            if ( values[i].duration_start < res.duration_start )
               res.duration_start = values[i].duration_start;
            if ( values[i].duration_end > res.duration_end )
               res.duration_end = values[i].duration_end;
        }
        return res;
    }
    """)

    FINALIZE_STATS = bson.code.Code("""
    function (key, value) {
        %(aggregate_val)s
        value.duration = (value.duration_end - value.duration_start) / 1000;
        value.period = NumberInt((value.period_end - value.period_start)
                                  / 1000);
        return value;
    }""")

    SORT_OPERATION_MAPPING = {'desc': (pymongo.DESCENDING, '$lt'),
                              'asc': (pymongo.ASCENDING, '$gt')}

    MAP_RESOURCES = bson.code.Code("""
    function () {
        emit(this.resource_id,
             {user_id: this.user_id,
              project_id: this.project_id,
              source: this.source,
              first_timestamp: this.timestamp,
              last_timestamp: this.timestamp,
              metadata: this.resource_metadata})
    }""")

    REDUCE_RESOURCES = bson.code.Code("""
    function (key, values) {
        var merge = {user_id: values[0].user_id,
                     project_id: values[0].project_id,
                     source: values[0].source,
                     first_timestamp: values[0].first_timestamp,
                     last_timestamp: values[0].last_timestamp,
                     metadata: values[0].metadata}
        values.forEach(function(value) {
            if (merge.first_timestamp - value.first_timestamp > 0) {
                merge.first_timestamp = value.first_timestamp;
                merge.user_id = value.user_id;
                merge.project_id = value.project_id;
                merge.source = value.source;
            } else if (merge.last_timestamp - value.last_timestamp <= 0) {
                merge.last_timestamp = value.last_timestamp;
                merge.metadata = value.metadata;
            }
        });
        return merge;
      }""")

    _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1)
    _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR, month=12, day=31,
                                    hour=23, minute=59, second=59)

    def __init__(self, url):

        # NOTE(jd) Use our own connection pooling on top of the Pymongo one.
        # We need that otherwise we overflow the MongoDB instance with new
        # connection since we instanciate a Pymongo client each time someone
        # requires a new storage connection.
        self.conn = self.CONNECTION_POOL.connect(url)

        # Require MongoDB 2.4 to use $setOnInsert
        if self.conn.server_info()['versionArray'] < [2, 4]:
            raise storage.StorageBadVersion("Need at least MongoDB 2.4")

        connection_options = pymongo.uri_parser.parse_uri(url)
        self.db = getattr(self.conn, connection_options['database'])
        if connection_options.get('username'):
            self.db.authenticate(connection_options['username'],
                                 connection_options['password'])

        # NOTE(jd) Upgrading is just about creating index, so let's do this
        # on connection to be sure at least the TTL is correcly updated if
        # needed.
        self.upgrade()

    def upgrade(self):
        # Establish indexes
        #
        # We need variations for user_id vs. project_id because of the
        # way the indexes are stored in b-trees. The user_id and
        # project_id values are usually mutually exclusive in the
        # queries, so the database won't take advantage of an index
        # including both.
        name_qualifier = dict(user_id='', project_id='project_')
        background = dict(user_id=False, project_id=True)
        for primary in ['user_id', 'project_id']:
            name = 'resource_%sidx' % name_qualifier[primary]
            self.db.resource.ensure_index([
                (primary, pymongo.ASCENDING),
                ('source', pymongo.ASCENDING),
            ], name=name, background=background[primary])

            name = 'meter_%sidx' % name_qualifier[primary]
            self.db.meter.ensure_index([
                ('resource_id', pymongo.ASCENDING),
                (primary, pymongo.ASCENDING),
                ('counter_name', pymongo.ASCENDING),
                ('timestamp', pymongo.ASCENDING),
                ('source', pymongo.ASCENDING),
            ], name=name, background=background[primary])

        self.db.resource.ensure_index([('last_sample_timestamp',
                                        pymongo.DESCENDING)],
                                      name='last_sample_timestamp_idx',
                                      sparse=True)
        self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)],
                                   name='timestamp_idx')
        # remove API v1 related table
        self.db.user.drop()
        self.db.project.drop()

        indexes = self.db.meter.index_information()

        ttl = cfg.CONF.database.time_to_live

        if ttl <= 0:
            if 'meter_ttl' in indexes:
                self.db.meter.drop_index('meter_ttl')
            return

        if 'meter_ttl' in indexes:
            # NOTE(sileht): manually check expireAfterSeconds because
            # ensure_index doesn't update index options if the index already
            # exists
            if ttl == indexes['meter_ttl'].get('expireAfterSeconds', -1):
                return

            self.db.meter.drop_index('meter_ttl')

        self.db.meter.create_index(
            [('timestamp', pymongo.ASCENDING)],
            expireAfterSeconds=ttl,
            name='meter_ttl'
        )

    def clear(self):
        self.conn.drop_database(self.db)
        # Connection will be reopened automatically if needed
        self.conn.close()

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        # Record the updated resource metadata - we use $setOnInsert to
        # unconditionally insert sample timestamps and resource metadata
        # (in the update case, this must be conditional on the sample not
        # being out-of-order)
        resource = self.db.resource.find_and_modify(
            {'_id': data['resource_id']},
            {'$set': {'project_id': data['project_id'],
                      'user_id': data['user_id'],
                      'source': data['source'],
                      },
             '$setOnInsert': {'metadata': data['resource_metadata'],
                              'first_sample_timestamp': data['timestamp'],
                              'last_sample_timestamp': data['timestamp'],
                              },
             '$addToSet': {'meter': {'counter_name': data['counter_name'],
                                     'counter_type': data['counter_type'],
                                     'counter_unit': data['counter_unit'],
                                     },
                           },
             },
            upsert=True,
            new=True,
        )

        # only update last sample timestamp if actually later (the usual
        # in-order case)
        last_sample_timestamp = resource.get('last_sample_timestamp')
        if (last_sample_timestamp is None or
                last_sample_timestamp <= data['timestamp']):
            self.db.resource.update(
                {'_id': data['resource_id']},
                {'$set': {'metadata': data['resource_metadata'],
                          'last_sample_timestamp': data['timestamp']}}
            )

        # only update first sample timestamp if actually earlier (the unusual
        # out-of-order case)
        # NOTE: a null first sample timestamp is not updated as this indicates
        # a pre-existing resource document dating from before we started
        # recording these timestamps in the resource collection
        first_sample_timestamp = resource.get('first_sample_timestamp')
        if (first_sample_timestamp is not None and
                first_sample_timestamp > data['timestamp']):
            self.db.resource.update(
                {'_id': data['resource_id']},
                {'$set': {'first_sample_timestamp': data['timestamp']}}
            )

        # Record the raw data for the meter. Use a copy so we do not
        # modify a data structure owned by our caller (the driver adds
        # a new key '_id').
        record = copy.copy(data)
        record['recorded_at'] = timeutils.utcnow()
        self.db.meter.insert(record)

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs according to the time-to-live.
        :param ttl: Number of seconds to keep records for.
        """
        results = self.db.meter.group(
            key={},
            condition={},
            reduce=self.REDUCE_GROUP_CLEAN,
            initial={
                'resources': [],
            }
        )[0]

        self.db.resource.remove({'_id': {'$nin': results['resources']}})

    @staticmethod
    def _get_marker(db_collection, marker_pairs):
        """Return the mark document according to the attribute-value pairs.

        :param db_collection: Database collection that be query.
        :param maker_pairs: Attribute-value pairs filter.
        """
        if db_collection is None:
            return
        if not marker_pairs:
            return
        ret = db_collection.find(marker_pairs, limit=2)

        if ret.count() == 0:
            raise base.NoResultFound
        elif ret.count() > 1:
            raise base.MultipleResultsFound
        else:
            _ret = ret.__getitem__(0)
            return _ret

    @classmethod
    def _recurse_sort_keys(cls, sort_keys, marker, flag):
        _first = sort_keys[0]
        value = marker[_first]
        if len(sort_keys) == 1:
            return {_first: {flag: value}}
        else:
            criteria_equ = {_first: {'eq': value}}
            criteria_cmp = cls._recurse_sort_keys(sort_keys[1:], marker, flag)
        return dict(criteria_equ, ** criteria_cmp)

    @classmethod
    def _build_paginate_query(cls, marker, sort_keys=None, sort_dir='desc'):
        """Returns a query with sorting / pagination.

        Pagination works by requiring sort_key and sort_dir.
        We use the last item in previous page as the 'marker' for pagination.
        So we return values that follow the passed marker in the order.
        :param q: The query dict passed in.
        :param marker: the last item of the previous page; we return the next
                       results after this item.
        :param sort_keys: array of attributes by which results be sorted.
        :param sort_dir: direction in which results be sorted (asc, desc).
        :return: sort parameters, query to use
        """
        all_sort = []
        sort_keys = sort_keys or []
        all_sort, _op = cls._build_sort_instructions(sort_keys, sort_dir)

        if marker is not None:
            sort_criteria_list = []

            for i in range(len(sort_keys)):
                # NOTE(fengqian): Generate the query criteria recursively.
                # sort_keys=[k1, k2, k3], maker_value=[v1, v2, v3]
                # sort_flags = ['$lt', '$gt', 'lt'].
                # The query criteria should be
                # {'k3': {'$lt': 'v3'}, 'k2': {'eq': 'v2'}, 'k1':
                #     {'eq': 'v1'}},
                # {'k2': {'$gt': 'v2'}, 'k1': {'eq': 'v1'}},
                # {'k1': {'$lt': 'v1'}} with 'OR' operation.
                # Each recurse will generate one items of three.
                sort_criteria_list.append(cls._recurse_sort_keys(
                                          sort_keys[:(len(sort_keys) - i)],
                                          marker, _op))

            metaquery = {"$or": sort_criteria_list}
        else:
            metaquery = {}

        return all_sort, metaquery

    @classmethod
    def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'):
        """Returns a sort_instruction and paging operator.

        Sort instructions are used in the query to determine what attributes
        to sort on and what direction to use.
        :param q: The query dict passed in.
        :param sort_keys: array of attributes by which results be sorted.
        :param sort_dir: direction in which results be sorted (asc, desc).
        :return: sort instructions and paging operator
        """
        sort_keys = sort_keys or []
        sort_instructions = []
        _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get(
            sort_dir, cls.SORT_OPERATION_MAPPING['desc'])

        for _sort_key in sort_keys:
            _instruction = (_sort_key, _sort_dir)
            sort_instructions.append(_instruction)

        return sort_instructions, operation

    @classmethod
    def paginate_query(cls, q, db_collection, limit=None, marker=None,
                       sort_keys=None, sort_dir='desc'):
        """Returns a query result with sorting / pagination.

        Pagination works by requiring sort_key and sort_dir.
        We use the last item in previous page as the 'marker' for pagination.
        So we return values that follow the passed marker in the order.

        :param q: the query dict passed in.
        :param db_collection: Database collection that be query.
        :param limit: maximum number of items to return.
        :param marker: the last item of the previous page; we return the next
                       results after this item.
        :param sort_keys: array of attributes by which results be sorted.
        :param sort_dir: direction in which results be sorted (asc, desc).

        :return: The query with sorting/pagination added.
        """

        sort_keys = sort_keys or []
        all_sort, query = cls._build_paginate_query(marker,
                                                    sort_keys,
                                                    sort_dir)
        q.update(query)

        # NOTE(Fengqian): MongoDB collection.find can not handle limit
        # when it equals None, it will raise TypeError, so we treat
        # None as 0 for the value of limit.
        if limit is None:
            limit = 0
        return db_collection.find(q, limit=limit, sort=all_sort)

    def _get_time_constrained_resources(self, query,
                                        start_timestamp, start_timestamp_op,
                                        end_timestamp, end_timestamp_op,
                                        metaquery, resource):
        """Return an iterable of models.Resource instances

        Items are constrained by sample timestamp.
        :param query: project/user/source query
        :param start_timestamp: modified timestamp start range.
        :param start_timestamp_op: start time operator, like gt, ge.
        :param end_timestamp: modified timestamp end range.
        :param end_timestamp_op: end time operator, like lt, le.
        :param metaquery: dict with metadata to match on.
        :param resource: resource filter.
        """
        if resource is not None:
            query['resource_id'] = resource

        # Add resource_ prefix so it matches the field in the db
        query.update(dict(('resource_' + k, v)
                          for (k, v) in six.iteritems(metaquery)))

        # FIXME(dhellmann): This may not perform very well,
        # but doing any better will require changing the database
        # schema and that will need more thought than I have time
        # to put into it today.
        # Look for resources matching the above criteria and with
        # samples in the time range we care about, then change the
        # resource query to return just those resources by id.
        ts_range = pymongo_utils.make_timestamp_range(start_timestamp,
                                                      end_timestamp,
                                                      start_timestamp_op,
                                                      end_timestamp_op)
        if ts_range:
            query['timestamp'] = ts_range

        sort_keys = base._handle_sort_key('resource')
        sort_instructions = self._build_sort_instructions(sort_keys)[0]

        # use a unique collection name for the results collection,
        # as result post-sorting (as oppposed to reduce pre-sorting)
        # is not possible on an inline M-R
        out = 'resource_list_%s' % uuid.uuid4()
        self.db.meter.map_reduce(self.MAP_RESOURCES,
                                 self.REDUCE_RESOURCES,
                                 out=out,
                                 sort={'resource_id': 1},
                                 query=query)

        try:
            for r in self.db[out].find(sort=sort_instructions):
                resource = r['value']
                yield models.Resource(
                    resource_id=r['_id'],
                    user_id=resource['user_id'],
                    project_id=resource['project_id'],
                    first_sample_timestamp=resource['first_timestamp'],
                    last_sample_timestamp=resource['last_timestamp'],
                    source=resource['source'],
                    metadata=resource['metadata'])
        finally:
            self.db[out].drop()

    def _get_floating_resources(self, query, metaquery, resource):
        """Return an iterable of models.Resource instances

        Items are unconstrained by timestamp.
        :param query: project/user/source query
        :param metaquery: dict with metadata to match on.
        :param resource: resource filter.
        """
        if resource is not None:
            query['_id'] = resource

        query.update(dict((k, v)
                          for (k, v) in six.iteritems(metaquery)))

        keys = base._handle_sort_key('resource')
        sort_keys = ['last_sample_timestamp' if i == 'timestamp' else i
                     for i in keys]
        sort_instructions = self._build_sort_instructions(sort_keys)[0]

        for r in self.db.resource.find(query, sort=sort_instructions):
            yield models.Resource(
                resource_id=r['_id'],
                user_id=r['user_id'],
                project_id=r['project_id'],
                first_sample_timestamp=r.get('first_sample_timestamp',
                                             self._GENESIS),
                last_sample_timestamp=r.get('last_sample_timestamp',
                                            self._APOCALYPSE),
                source=r['source'],
                metadata=r['metadata'])

    def get_resources(self, user=None, project=None, source=None,
                      start_timestamp=None, start_timestamp_op=None,
                      end_timestamp=None, end_timestamp_op=None,
                      metaquery=None, resource=None, pagination=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        query = {}
        if user is not None:
            query['user_id'] = user
        if project is not None:
            query['project_id'] = project
        if source is not None:
            query['source'] = source

        if start_timestamp or end_timestamp:
            return self._get_time_constrained_resources(query,
                                                        start_timestamp,
                                                        start_timestamp_op,
                                                        end_timestamp,
                                                        end_timestamp_op,
                                                        metaquery, resource)
        else:
            return self._get_floating_resources(query, metaquery, resource)

    def _aggregate_param(self, fragment_key, aggregate):
        fragment_map = self.STANDARD_AGGREGATES[fragment_key]

        if not aggregate:
            return ''.join([f for f in fragment_map.values()])

        fragments = ''

        for a in aggregate:
            if a.func in self.STANDARD_AGGREGATES[fragment_key]:
                fragment_map = self.STANDARD_AGGREGATES[fragment_key]
                fragments += fragment_map[a.func]
            elif a.func in self.UNPARAMETERIZED_AGGREGATES[fragment_key]:
                fragment_map = self.UNPARAMETERIZED_AGGREGATES[fragment_key]
                fragments += fragment_map[a.func]
            elif a.func in self.PARAMETERIZED_AGGREGATES[fragment_key]:
                fragment_map = self.PARAMETERIZED_AGGREGATES[fragment_key]
                v = self.PARAMETERIZED_AGGREGATES['validate'].get(a.func)
                if not (v and v(a.param)):
                    raise storage.StorageBadAggregate('Bad aggregate: %s.%s'
                                                      % (a.func, a.param))
                params = dict(aggregate_param=a.param)
                fragments += (fragment_map[a.func] % params)
            else:
                raise ceilometer.NotImplementedError(
                    'Selectable aggregate function %s'
                    ' is not supported' % a.func)

        return fragments

    def get_meter_statistics(self, sample_filter, period=None, groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instance.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if (groupby and
                set(groupby) - set(['user_id', 'project_id',
                                    'resource_id', 'source'])):
            raise ceilometer.NotImplementedError(
                "Unable to group by these fields")

        q = pymongo_utils.make_query_from_filter(sample_filter)

        if period:
            if sample_filter.start:
                period_start = sample_filter.start
            else:
                period_start = self.db.meter.find(
                    limit=1, sort=[('timestamp',
                                    pymongo.ASCENDING)])[0]['timestamp']
            period_start = int(calendar.timegm(period_start.utctimetuple()))
            map_params = {'period': period,
                          'period_first': period_start,
                          'groupby_fields': json.dumps(groupby)}
            if groupby:
                map_fragment = self.MAP_STATS_PERIOD_GROUPBY
            else:
                map_fragment = self.MAP_STATS_PERIOD
        else:
            if groupby:
                map_params = {'groupby_fields': json.dumps(groupby)}
                map_fragment = self.MAP_STATS_GROUPBY
            else:
                map_params = dict()
                map_fragment = self.MAP_STATS

        sub = self._aggregate_param

        map_params['aggregate_initial_val'] = sub('emit_initial', aggregate)
        map_params['aggregate_body_val'] = sub('emit_body', aggregate)

        map_stats = map_fragment % map_params

        reduce_params = dict(
            aggregate_initial_val=sub('reduce_initial', aggregate),
            aggregate_body_val=sub('reduce_body', aggregate),
            aggregate_computation_val=sub('reduce_computation', aggregate)
        )
        reduce_stats = self.REDUCE_STATS % reduce_params

        finalize_params = dict(aggregate_val=sub('finalize', aggregate))
        finalize_stats = self.FINALIZE_STATS % finalize_params

        results = self.db.meter.map_reduce(
            map_stats,
            reduce_stats,
            {'inline': 1},
            finalize=finalize_stats,
            query=q,
        )

        # FIXME(terriyu) Fix get_meter_statistics() so we don't use sorted()
        # to return the results
        return sorted(
            (self._stats_result_to_model(r['value'], groupby, aggregate)
             for r in results['results']),
            key=operator.attrgetter('period_start'))

    @staticmethod
    def _stats_result_aggregates(result, aggregate):
        stats_args = {}
        for attr in ['count', 'min', 'max', 'sum', 'avg']:
            if attr in result:
                stats_args[attr] = result[attr]

        if aggregate:
            stats_args['aggregate'] = {}
            for a in aggregate:
                ak = '%s%s' % (a.func, '/%s' % a.param if a.param else '')
                if ak in result:
                    stats_args['aggregate'][ak] = result[ak]
                elif 'aggregate' in result:
                    stats_args['aggregate'][ak] = result['aggregate'].get(ak)
        return stats_args

    @staticmethod
    def _stats_result_to_model(result, groupby, aggregate):
        stats_args = Connection._stats_result_aggregates(result, aggregate)
        stats_args['unit'] = result['unit']
        stats_args['duration'] = result['duration']
        stats_args['duration_start'] = result['duration_start']
        stats_args['duration_end'] = result['duration_end']
        stats_args['period'] = result['period']
        stats_args['period_start'] = result['period_start']
        stats_args['period_end'] = result['period_end']
        stats_args['groupby'] = (dict(
            (g, result['groupby'][g]) for g in groupby) if groupby else None)
        return models.Statistics(**stats_args)
Ejemplo n.º 10
0
class Connection(hbase_base.Connection, base.Connection):
    """Put the alarm data into a HBase database

    Collections:

    - alarm:

      - row_key: uuid of alarm
      - Column Families:

        f: contains the raw incoming alarm data

    - alarm_h:

      - row_key: uuid of alarm + ":" + reversed timestamp
      - Column Families:

        f: raw incoming alarm_history data. Timestamp becomes now()
          if not determined
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    _memory_instance = None

    ALARM_TABLE = "alarm"
    ALARM_HISTORY_TABLE = "alarm_h"

    def __init__(self, url):
        super(Connection, self).__init__(url)

    def upgrade(self):
        tables = [self.ALARM_HISTORY_TABLE, self.ALARM_TABLE]
        column_families = {'f': dict()}
        with self.conn_pool.connection() as conn:
            hbase_utils.create_tables(conn, tables, column_families)
            hbase_migration.migrate_tables(conn, tables)

    def clear(self):
        LOG.debug(_('Dropping HBase schema...'))
        with self.conn_pool.connection() as conn:
            for table in [self.ALARM_TABLE,
                          self.ALARM_HISTORY_TABLE]:
                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug(_('Cannot disable table but ignoring error'))
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug(_('Cannot delete table but ignoring error'))

    def update_alarm(self, alarm):
        """Create an alarm.

        :param alarm: The alarm to create. It is Alarm object, so we need to
          call as_dict()
        """
        _id = alarm.alarm_id
        alarm_to_store = hbase_utils.serialize_entry(alarm.as_dict())
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.put(_id, alarm_to_store)
            stored_alarm = hbase_utils.deserialize_entry(
                alarm_table.row(_id))[0]
        return models.Alarm(**stored_alarm)

    create_alarm = update_alarm

    def delete_alarm(self, alarm_id):
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.delete(alarm_id)

    def get_alarms(self, name=None, user=None, state=None, meter=None,
                   project=None, enabled=None, alarm_id=None, pagination=None,
                   alarm_type=None):

        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')
        if meter:
            raise ceilometer.NotImplementedError(
                'Filter by meter not implemented')

        q = hbase_utils.make_query(alarm_id=alarm_id, name=name,
                                   enabled=enabled, user_id=user,
                                   project_id=project, state=state,
                                   type=alarm_type)

        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            gen = alarm_table.scan(filter=q)
            alarms = [hbase_utils.deserialize_entry(data)[0]
                      for ignored, data in gen]
            for alarm in sorted(
                    alarms,
                    key=operator.itemgetter('timestamp'),
                    reverse=True):
                yield models.Alarm(**alarm)

    def get_alarm_changes(self, alarm_id, on_behalf_of,
                          user=None, project=None, alarm_type=None,
                          start_timestamp=None, start_timestamp_op=None,
                          end_timestamp=None, end_timestamp_op=None):
        q = hbase_utils.make_query(alarm_id=alarm_id,
                                   on_behalf_of=on_behalf_of, type=alarm_type,
                                   user_id=user, project_id=project)
        start_row, end_row = hbase_utils.make_timestamp_query(
            hbase_utils.make_general_rowkey_scan,
            start=start_timestamp, start_op=start_timestamp_op,
            end=end_timestamp, end_op=end_timestamp_op, bounds_only=True,
            some_id=alarm_id)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            gen = alarm_history_table.scan(filter=q, row_start=start_row,
                                           row_stop=end_row)
            for ignored, data in gen:
                stored_entry = hbase_utils.deserialize_entry(data)[0]
                yield models.AlarmChange(**stored_entry)

    def record_alarm_change(self, alarm_change):
        """Record alarm change event."""
        alarm_change_dict = hbase_utils.serialize_entry(alarm_change)
        ts = alarm_change.get('timestamp') or datetime.datetime.now()
        rts = hbase_utils.timestamp(ts)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            alarm_history_table.put(
                hbase_utils.prepare_key(alarm_change.get('alarm_id'), rts),
                alarm_change_dict)
Ejemplo n.º 11
0
class Connection(base.Connection):
    """Put the data into a HBase database

    Collections:

    - alarm:

      - row_key: uuid of alarm
      - Column Families:

        f: contains the raw incoming alarm data

    - alarm_h:

      - row_key: uuid of alarm + "_" + reversed timestamp
      - Column Families:

        f: raw incoming alarm_history data. Timestamp becomes now()
          if not determined
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    _memory_instance = None

    ALARM_TABLE = "alarm"
    ALARM_HISTORY_TABLE = "alarm_h"

    def __init__(self, url):
        """Hbase Connection Initialization."""
        opts = self._parse_connection_url(url)

        if opts['host'] == '__test__':
            url = os.environ.get('CEILOMETER_TEST_HBASE_URL')
            if url:
                # Reparse URL, but from the env variable now
                opts = self._parse_connection_url(url)
                self.conn_pool = self._get_connection_pool(opts)
            else:
                # This is a in-memory usage for unit tests
                if Connection._memory_instance is None:
                    LOG.debug(
                        _('Creating a new in-memory HBase '
                          'Connection object'))
                    Connection._memory_instance = (
                        hbase_inmemory.MConnectionPool())
                self.conn_pool = Connection._memory_instance
        else:
            self.conn_pool = self._get_connection_pool(opts)

    def upgrade(self):
        with self.conn_pool.connection() as conn:
            conn.create_table(self.ALARM_TABLE, {'f': dict()})
            conn.create_table(self.ALARM_HISTORY_TABLE, {'f': dict()})

    def clear(self):
        LOG.debug(_('Dropping HBase schema...'))
        with self.conn_pool.connection() as conn:
            for table in [self.ALARM_TABLE, self.ALARM_HISTORY_TABLE]:
                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug(_('Cannot disable table but ignoring error'))
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug(_('Cannot delete table but ignoring error'))

    @staticmethod
    def _get_connection_pool(conf):
        """Return a connection pool to the database.

        .. note::

          The tests use a subclass to override this and return an
          in-memory connection pool.
        """
        LOG.debug(
            _('connecting to HBase on %(host)s:%(port)s') %
            ({
                'host': conf['host'],
                'port': conf['port']
            }))
        return happybase.ConnectionPool(size=100,
                                        host=conf['host'],
                                        port=conf['port'],
                                        table_prefix=conf['table_prefix'])

    @staticmethod
    def _parse_connection_url(url):
        """Parse connection parameters from a database url.

        .. note::

          HBase Thrift does not support authentication and there is no
          database name, so we are not looking for these in the url.
        """
        opts = {}
        result = netutils.urlsplit(url)
        opts['table_prefix'] = urlparse.parse_qs(result.query).get(
            'table_prefix', [None])[0]
        opts['dbtype'] = result.scheme
        if ':' in result.netloc:
            opts['host'], port = result.netloc.split(':')
        else:
            opts['host'] = result.netloc
            port = 9090
        opts['port'] = port and int(port) or 9090
        return opts

    def update_alarm(self, alarm):
        """Create an alarm.

        :param alarm: The alarm to create. It is Alarm object, so we need to
          call as_dict()
        """
        _id = alarm.alarm_id
        alarm_to_store = hbase_utils.serialize_entry(alarm.as_dict())
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.put(_id, alarm_to_store)
            stored_alarm = hbase_utils.deserialize_entry(
                alarm_table.row(_id))[0]
        return models.Alarm(**stored_alarm)

    create_alarm = update_alarm

    def delete_alarm(self, alarm_id):
        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            alarm_table.delete(alarm_id)

    def get_alarms(self,
                   name=None,
                   user=None,
                   state=None,
                   meter=None,
                   project=None,
                   enabled=None,
                   alarm_id=None,
                   pagination=None):

        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')
        if meter:
            raise ceilometer.NotImplementedError(
                'Filter by meter not implemented')

        q = hbase_utils.make_query(alarm_id=alarm_id,
                                   name=name,
                                   enabled=enabled,
                                   user_id=user,
                                   project_id=project,
                                   state=state)

        with self.conn_pool.connection() as conn:
            alarm_table = conn.table(self.ALARM_TABLE)
            gen = alarm_table.scan(filter=q)
            for ignored, data in gen:
                stored_alarm = hbase_utils.deserialize_entry(data)[0]
                yield models.Alarm(**stored_alarm)

    def get_alarm_changes(self,
                          alarm_id,
                          on_behalf_of,
                          user=None,
                          project=None,
                          type=None,
                          start_timestamp=None,
                          start_timestamp_op=None,
                          end_timestamp=None,
                          end_timestamp_op=None):
        q = hbase_utils.make_query(alarm_id=alarm_id,
                                   on_behalf_of=on_behalf_of,
                                   type=type,
                                   user_id=user,
                                   project_id=project)
        start_row, end_row = hbase_utils.make_timestamp_query(
            hbase_utils.make_general_rowkey_scan,
            start=start_timestamp,
            start_op=start_timestamp_op,
            end=end_timestamp,
            end_op=end_timestamp_op,
            bounds_only=True,
            some_id=alarm_id)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            gen = alarm_history_table.scan(filter=q,
                                           row_start=start_row,
                                           row_stop=end_row)
            for ignored, data in gen:
                stored_entry = hbase_utils.deserialize_entry(data)[0]
                yield models.AlarmChange(**stored_entry)

    def record_alarm_change(self, alarm_change):
        """Record alarm change event."""
        alarm_change_dict = hbase_utils.serialize_entry(alarm_change)
        ts = alarm_change.get('timestamp') or datetime.datetime.now()
        rts = hbase_utils.timestamp(ts)
        with self.conn_pool.connection() as conn:
            alarm_history_table = conn.table(self.ALARM_HISTORY_TABLE)
            alarm_history_table.put(
                alarm_change.get('alarm_id') + "_" + str(rts),
                alarm_change_dict)
Ejemplo n.º 12
0
class Connection(hbase_base.Connection, base.Connection):
    """Put the event data into a HBase database

    Collections:

    - events:

      - row_key: timestamp of event's generation + uuid of event
        in format: "%s:%s" % (ts, Event.message_id)
      - Column Families:

        f: contains the following qualifiers:

          - event_type: description of event's type
          - timestamp: time stamp of event generation
          - all traits for this event in format:

            .. code-block:: python

              "%s:%s" % (trait_name, trait_type)
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    _memory_instance = None

    EVENT_TABLE = "event"

    def __init__(self, url):
        super(Connection, self).__init__(url)

    def upgrade(self):
        tables = [self.EVENT_TABLE]
        column_families = {'f': dict(max_versions=1)}
        with self.conn_pool.connection() as conn:
            hbase_utils.create_tables(conn, tables, column_families)

    def clear(self):
        LOG.debug(_('Dropping HBase schema...'))
        with self.conn_pool.connection() as conn:
            for table in [self.EVENT_TABLE]:
                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug(_('Cannot disable table but ignoring error'))
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug(_('Cannot delete table but ignoring error'))

    def record_events(self, event_models):
        """Write the events to Hbase.

        :param event_models: a list of models.Event objects.
        :return problem_events: a list of events that could not be saved in a
          (reason, event) tuple. From the reasons that are enumerated in
          storage.models.Event only the UNKNOWN_PROBLEM is applicable here.
        """
        problem_events = []

        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            for event_model in event_models:
                # Row key consists of timestamp and message_id from
                # models.Event or purposes of storage event sorted by
                # timestamp in the database.
                ts = event_model.generated
                row = hbase_utils.prepare_key(
                    hbase_utils.timestamp(ts, reverse=False),
                    event_model.message_id)
                event_type = event_model.event_type
                traits = {}
                if event_model.traits:
                    for trait in event_model.traits:
                        key = hbase_utils.prepare_key(trait.name, trait.dtype)
                        traits[key] = trait.value
                record = hbase_utils.serialize_entry(traits,
                                                     event_type=event_type,
                                                     timestamp=ts,
                                                     raw=event_model.raw)
                try:
                    events_table.put(row, record)
                except Exception as ex:
                    LOG.debug(_("Failed to record event: %s") % ex)
                    problem_events.append(
                        (models.Event.UNKNOWN_PROBLEM, event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return an iter of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
          for events that are stored in database.
        """
        q, start, stop = hbase_utils.make_events_query_from_filter(
            event_filter)
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)

            gen = events_table.scan(filter=q, row_start=start, row_stop=stop)

        for event_id, data in gen:
            traits = []
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if isinstance(key, tuple):
                    trait_name, trait_dtype = key
                    traits.append(
                        models.Trait(name=trait_name,
                                     dtype=int(trait_dtype),
                                     value=value))
            ts, mess = event_id.split(':')

            yield models.Event(message_id=hbase_utils.unquote(mess),
                               event_type=events_dict['event_type'],
                               generated=events_dict['timestamp'],
                               traits=sorted(traits,
                                             key=operator.attrgetter('dtype')),
                               raw=events_dict['raw'])

    def get_event_types(self):
        """Return all event types as an iterable of strings."""
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan()

        event_types = set()
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if not isinstance(key, tuple) and key.startswith('event_type'):
                    if value not in event_types:
                        event_types.add(value)
                        yield value

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event
        """

        q = hbase_utils.make_query(event_type=event_type)
        trait_names = set()
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan(filter=q)
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if isinstance(key, tuple):
                    trait_name, trait_type = key
                    if trait_name not in trait_names:
                        # Here we check that our method return only unique
                        # trait types, for ex. if it is found the same trait
                        # types in different events with equal event_type,
                        # method will return only one trait type. It is
                        # proposed that certain trait name could have only one
                        # trait type.
                        trait_names.add(trait_name)
                        data_type = models.Trait.type_names[int(trait_type)]
                        yield {'name': trait_name, 'data_type': data_type}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """
        q = hbase_utils.make_query(event_type=event_type,
                                   trait_type=trait_type)
        with self.conn_pool.connection() as conn:
            events_table = conn.table(self.EVENT_TABLE)
            gen = events_table.scan(filter=q)
        for event_id, data in gen:
            events_dict = hbase_utils.deserialize_entry(data)[0]
            for key, value in events_dict.items():
                if isinstance(key, tuple):
                    trait_name, trait_type = key
                    yield models.Trait(name=trait_name,
                                       dtype=int(trait_type),
                                       value=value)
Ejemplo n.º 13
0
class Connection(base.Connection):
    """Put the data into a SQLAlchemy database.

    Tables::

        - meter
          - meter definition
          - { id: meter id
              name: meter name
              type: meter type
              unit: meter unit
              }
        - resource
          - resource definition
          - { internal_id: resource id
              resource_id: resource uuid
              user_id: user uuid
              project_id: project uuid
              source_id: source id
              resource_metadata: metadata dictionary
              metadata_hash: metadata dictionary hash
              }
        - sample
          - the raw incoming data
          - { id: sample id
              meter_id: meter id            (->meter.id)
              resource_id: resource id      (->resource.internal_id)
              volume: sample volume
              timestamp: datetime
              recorded_at: datetime
              message_signature: message signature
              message_id: message uuid
              }
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        # Set max_retries to 0, since oslo.db in certain cases may attempt
        # to retry making the db connection retried max_retries ^ 2 times
        # in failure case and db reconnection has already been implemented
        # in storage.__init__.get_connection_from_config function
        cfg.CONF.set_override('max_retries', 0, group='database')
        self._engine_facade = db_session.EngineFacade(
            url, **dict(cfg.CONF.database.items()))

    def upgrade(self):
        # NOTE(gordc): to minimise memory, only import migration when needed
        from oslo.db.sqlalchemy import migration
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                            'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        self._engine_facade._session_maker.close_all()
        engine.dispose()

    @staticmethod
    def _create_meter(conn, name, type, unit):
        # TODO(gordc): implement lru_cache to improve performance
        try:
            meter = models.Meter.__table__
            trans = conn.begin_nested()
            if conn.dialect.name == 'sqlite':
                trans = conn.begin()
            with trans:
                meter_row = conn.execute(
                    sa.select([meter.c.id]).where(
                        sa.and_(meter.c.name == name, meter.c.type == type,
                                meter.c.unit == unit))).first()
                meter_id = meter_row[0] if meter_row else None
                if meter_id is None:
                    result = conn.execute(meter.insert(),
                                          name=name,
                                          type=type,
                                          unit=unit)
                    meter_id = result.inserted_primary_key[0]
        except dbexc.DBDuplicateEntry:
            # retry function to pick up duplicate committed object
            meter_id = Connection._create_meter(conn, name, type, unit)

        return meter_id

    @staticmethod
    def _create_resource(conn, res_id, user_id, project_id, source_id, rmeta):
        # TODO(gordc): implement lru_cache to improve performance
        try:
            res = models.Resource.__table__
            m_hash = hashlib.md5(jsonutils.dumps(rmeta,
                                                 sort_keys=True)).hexdigest()
            trans = conn.begin_nested()
            if conn.dialect.name == 'sqlite':
                trans = conn.begin()
            with trans:
                res_row = conn.execute(
                    sa.select([res.c.internal_id]).where(
                        sa.and_(res.c.resource_id == res_id,
                                res.c.user_id == user_id,
                                res.c.project_id == project_id,
                                res.c.source_id == source_id,
                                res.c.metadata_hash == m_hash))).first()
                internal_id = res_row[0] if res_row else None
                if internal_id is None:
                    result = conn.execute(res.insert(),
                                          resource_id=res_id,
                                          user_id=user_id,
                                          project_id=project_id,
                                          source_id=source_id,
                                          resource_metadata=rmeta,
                                          metadata_hash=m_hash)
                    internal_id = result.inserted_primary_key[0]
                    if rmeta and isinstance(rmeta, dict):
                        meta_map = {}
                        for key, v in utils.dict_to_keyval(rmeta):
                            try:
                                _model = sql_utils.META_TYPE_MAP[type(v)]
                                if meta_map.get(_model) is None:
                                    meta_map[_model] = []
                                meta_map[_model].append({
                                    'id': internal_id,
                                    'meta_key': key,
                                    'value': v
                                })
                            except KeyError:
                                LOG.warn(
                                    _("Unknown metadata type. Key (%s) "
                                      "will not be queryable."), key)
                        for _model in meta_map.keys():
                            conn.execute(_model.__table__.insert(),
                                         meta_map[_model])

        except dbexc.DBDuplicateEntry:
            # retry function to pick up duplicate committed object
            internal_id = Connection._create_resource(conn, res_id, user_id,
                                                      project_id, source_id,
                                                      rmeta)

        return internal_id

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        engine = self._engine_facade.get_engine()
        with engine.begin() as conn:
            # Record the raw data for the sample.
            m_id = self._create_meter(conn, data['counter_name'],
                                      data['counter_type'],
                                      data['counter_unit'])
            res_id = self._create_resource(conn, data['resource_id'],
                                           data['user_id'], data['project_id'],
                                           data['source'],
                                           data['resource_metadata'])
            sample = models.Sample.__table__
            conn.execute(sample.insert(),
                         meter_id=m_id,
                         resource_id=res_id,
                         timestamp=data['timestamp'],
                         volume=data['counter_volume'],
                         message_signature=data['message_signature'],
                         message_id=data['message_id'])

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs according to the time-to-live.
        :param ttl: Number of seconds to keep records for.
        """

        session = self._engine_facade.get_session()
        with session.begin():
            end = timeutils.utcnow() - datetime.timedelta(seconds=ttl)
            sample_q = (session.query(
                models.Sample).filter(models.Sample.timestamp < end))

            sample_subq = sample_q.subquery()
            for table in [
                    models.MetaText, models.MetaBigInt, models.MetaFloat,
                    models.MetaBool
            ]:
                (session.query(table).join(
                    sample_subq, sample_subq.c.id == table.id).delete())

            rows = sample_q.delete()
            # remove Meter definitions with no matching samples
            (session.query(
                models.Meter).filter(~models.Meter.samples.any()).delete(
                    synchronize_session='fetch'))
            (session.query(
                models.Resource).filter(~models.Resource.samples.any()).delete(
                    synchronize_session='fetch'))
            LOG.info(_("%d samples removed from database"), rows)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      pagination=None):
        """Return an iterable of api_models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        s_filter = storage.SampleFilter(user=user,
                                        project=project,
                                        source=source,
                                        start_timestamp=start_timestamp,
                                        start_timestamp_op=start_timestamp_op,
                                        end_timestamp=end_timestamp,
                                        end_timestamp_op=end_timestamp_op,
                                        metaquery=metaquery,
                                        resource=resource)

        session = self._engine_facade.get_session()
        # get list of resource_ids
        res_q = session.query(distinct(models.Resource.resource_id)).join(
            models.Sample,
            models.Sample.resource_id == models.Resource.internal_id)
        res_q = make_query_from_filter(session,
                                       res_q,
                                       s_filter,
                                       require_meter=False)

        for res_id in res_q.all():
            # get latest Sample
            max_q = (session.query(models.Sample).join(
                models.Resource, models.Resource.internal_id == models.Sample.
                resource_id).filter(models.Resource.resource_id == res_id[0]))
            max_q = make_query_from_filter(session,
                                           max_q,
                                           s_filter,
                                           require_meter=False)
            max_q = max_q.order_by(models.Sample.timestamp.desc(),
                                   models.Sample.id.desc()).limit(1)

            # get the min timestamp value.
            min_q = (session.query(models.Sample.timestamp).join(
                models.Resource, models.Resource.internal_id == models.Sample.
                resource_id).filter(models.Resource.resource_id == res_id[0]))
            min_q = make_query_from_filter(session,
                                           min_q,
                                           s_filter,
                                           require_meter=False)
            min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1)

            sample = max_q.first()
            if sample:
                yield api_models.Resource(
                    resource_id=sample.resource.resource_id,
                    project_id=sample.resource.project_id,
                    first_sample_timestamp=min_q.first().timestamp,
                    last_sample_timestamp=sample.timestamp,
                    source=sample.resource.source_id,
                    user_id=sample.resource.user_id,
                    metadata=sample.resource.resource_metadata)

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   pagination=None):
        """Return an iterable of api_models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional ID of the resource.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        s_filter = storage.SampleFilter(user=user,
                                        project=project,
                                        source=source,
                                        metaquery=metaquery,
                                        resource=resource)

        # NOTE(gordc): get latest sample of each meter/resource. we do not
        #              filter here as we want to filter only on latest record.
        session = self._engine_facade.get_session()
        subq = session.query(func.max(models.Sample.id).label('id')).join(
            models.Resource,
            models.Resource.internal_id == models.Sample.resource_id).group_by(
                models.Sample.meter_id, models.Resource.resource_id)
        if resource:
            subq = subq.filter(models.Resource.resource_id == resource)
        subq = subq.subquery()

        # get meter details for samples.
        query_sample = (session.query(
            models.Sample.meter_id, models.Meter.name, models.Meter.type,
            models.Meter.unit, models.Resource.resource_id,
            models.Resource.project_id,
            models.Resource.source_id, models.Resource.user_id).join(
                subq, subq.c.id == models.Sample.id).join(
                    models.Meter,
                    models.Meter.id == models.Sample.meter_id).join(
                        models.Resource, models.Resource.internal_id ==
                        models.Sample.resource_id))
        query_sample = make_query_from_filter(session,
                                              query_sample,
                                              s_filter,
                                              require_meter=False)

        for row in query_sample.all():
            yield api_models.Meter(name=row.name,
                                   type=row.type,
                                   unit=row.unit,
                                   resource_id=row.resource_id,
                                   project_id=row.project_id,
                                   source=row.source_id,
                                   user_id=row.user_id)

    def _retrieve_samples(self, query):
        samples = query.all()

        for s in samples:
            # Remove the id generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            yield api_models.Sample(
                source=s.source_id,
                counter_name=s.counter_name,
                counter_type=s.counter_type,
                counter_unit=s.counter_unit,
                counter_volume=s.counter_volume,
                user_id=s.user_id,
                project_id=s.project_id,
                resource_id=s.resource_id,
                timestamp=s.timestamp,
                recorded_at=s.recorded_at,
                resource_metadata=s.resource_metadata,
                message_id=s.message_id,
                message_signature=s.message_signature,
            )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of api_models.Samples.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        query = session.query(
            models.Sample.timestamp, models.Sample.recorded_at,
            models.Sample.message_id, models.Sample.message_signature,
            models.Sample.volume.label('counter_volume'),
            models.Meter.name.label('counter_name'),
            models.Meter.type.label('counter_type'),
            models.Meter.unit.label('counter_unit'), models.Resource.source_id,
            models.Resource.user_id, models.Resource.project_id,
            models.Resource.resource_metadata,
            models.Resource.resource_id).join(
                models.Meter, models.Meter.id == models.Sample.meter_id).join(
                    models.Resource, models.Resource.internal_id ==
                    models.Sample.resource_id).order_by(
                        models.Sample.timestamp.desc())
        query = make_query_from_filter(session,
                                       query,
                                       sample_filter,
                                       require_meter=False)
        if limit:
            query = query.limit(limit)
        return self._retrieve_samples(query)

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        query = session.query(models.FullSample)
        transformer = sql_utils.QueryTransformer(models.FullSample, query)
        if filter_expr is not None:
            transformer.apply_filter(filter_expr)

        transformer.apply_options(orderby, limit)
        return self._retrieve_samples(transformer.get_query())

    @staticmethod
    def _get_aggregate_functions(aggregate):
        if not aggregate:
            return [f for f in STANDARD_AGGREGATES.values()]

        functions = []

        for a in aggregate:
            if a.func in STANDARD_AGGREGATES:
                functions.append(STANDARD_AGGREGATES[a.func])
            elif a.func in UNPARAMETERIZED_AGGREGATES:
                functions.append(UNPARAMETERIZED_AGGREGATES[a.func])
            elif a.func in PARAMETERIZED_AGGREGATES['compute']:
                validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func)
                if not (validate and validate(a.param)):
                    raise storage.StorageBadAggregate('Bad aggregate: %s.%s' %
                                                      (a.func, a.param))
                compute = PARAMETERIZED_AGGREGATES['compute'][a.func]
                functions.append(compute(a.param))
            else:
                raise ceilometer.NotImplementedError(
                    'Selectable aggregate function %s'
                    ' is not supported' % a.func)

        return functions

    def _make_stats_query(self, sample_filter, groupby, aggregate):

        select = [
            func.min(models.Sample.timestamp).label('tsmin'),
            func.max(models.Sample.timestamp).label('tsmax'), models.Meter.unit
        ]
        select.extend(self._get_aggregate_functions(aggregate))

        session = self._engine_facade.get_session()

        if groupby:
            group_attributes = []
            for g in groupby:
                if g != 'resource_metadata.instance_type':
                    group_attributes.append(getattr(models.Resource, g))
                else:
                    group_attributes.append(
                        getattr(
                            models.MetaText,
                            'value').label('resource_metadata.instance_type'))

            select.extend(group_attributes)

        query = (session.query(*select).join(
            models.Meter, models.Meter.id == models.Sample.meter_id).join(
                models.Resource, models.Resource.internal_id ==
                models.Sample.resource_id).group_by(models.Meter.unit))

        if groupby:
            for g in groupby:
                if g == 'resource_metadata.instance_type':
                    query = query.join(
                        models.MetaText,
                        models.Resource.internal_id == models.MetaText.id)
                    query = query.filter(
                        models.MetaText.meta_key == 'instance_type')
            query = query.group_by(*group_attributes)

        return make_query_from_filter(session, query, sample_filter)

    @staticmethod
    def _stats_result_aggregates(result, aggregate):
        stats_args = {}
        if isinstance(result.count, (int, long)):
            stats_args['count'] = result.count
        for attr in ['min', 'max', 'sum', 'avg']:
            if hasattr(result, attr):
                stats_args[attr] = getattr(result, attr)
        if aggregate:
            stats_args['aggregate'] = {}
            for a in aggregate:
                key = '%s%s' % (a.func, '/%s' % a.param if a.param else '')
                stats_args['aggregate'][key] = getattr(result, key)
        return stats_args

    @staticmethod
    def _stats_result_to_model(result, period, period_start, period_end,
                               groupby, aggregate):
        stats_args = Connection._stats_result_aggregates(result, aggregate)
        stats_args['unit'] = result.unit
        duration = (timeutils.delta_seconds(result.tsmin, result.tsmax)
                    if result.tsmin is not None and result.tsmax is not None
                    else None)
        stats_args['duration'] = duration
        stats_args['duration_start'] = result.tsmin
        stats_args['duration_end'] = result.tsmax
        stats_args['period'] = period
        stats_args['period_start'] = period_start
        stats_args['period_end'] = period_end
        stats_args['groupby'] = (dict(
            (g, getattr(result, g)) for g in groupby) if groupby else None)
        return api_models.Statistics(**stats_args)

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of api_models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if groupby:
            for group in groupby:
                if group not in [
                        'user_id', 'project_id', 'resource_id',
                        'resource_metadata.instance_type'
                ]:
                    raise ceilometer.NotImplementedError('Unable to group by '
                                                         'these fields')

        if not period:
            for res in self._make_stats_query(sample_filter, groupby,
                                              aggregate):
                if res.count:
                    yield self._stats_result_to_model(res, 0, res.tsmin,
                                                      res.tsmax, groupby,
                                                      aggregate)
            return

        if not (sample_filter.start_timestamp and sample_filter.end_timestamp):
            res = self._make_stats_query(sample_filter, None,
                                         aggregate).first()
            if not res:
                # NOTE(liusheng):The 'res' may be NoneType, because no
                # sample has found with sample filter(s).
                return

        query = self._make_stats_query(sample_filter, groupby, aggregate)
        # HACK(jd) This is an awful method to compute stats by period, but
        # since we're trying to be SQL agnostic we have to write portable
        # code, so here it is, admire! We're going to do one request to get
        # stats by period. We would like to use GROUP BY, but there's no
        # portable way to manipulate timestamp in SQL, so we can't.
        for period_start, period_end in base.iter_period(
                sample_filter.start_timestamp or res.tsmin,
                sample_filter.end_timestamp or res.tsmax, period):
            q = query.filter(models.Sample.timestamp >= period_start)
            q = q.filter(models.Sample.timestamp < period_end)
            for r in q.all():
                if r.count:
                    yield self._stats_result_to_model(
                        result=r,
                        period=int(
                            timeutils.delta_seconds(period_start, period_end)),
                        period_start=period_start,
                        period_end=period_end,
                        groupby=groupby,
                        aggregate=aggregate)
Ejemplo n.º 14
0
 def __init__(self, conf, AVAILABLE_CAPABILITIES):
     super(Connection, self).__init__(
         conf,
         utils.update_nested(COMMON_AVAILABLE_CAPABILITIES,
                             AVAILABLE_CAPABILITIES))
Ejemplo n.º 15
0
class Connection(pymongo_base.Connection):
    """The db2 storage for Ceilometer

    Collections::

        - meter
          - the raw incoming data
        - resource
          - the metadata for resources
          - { _id: uuid of resource,
              metadata: metadata dictionaries
              user_id: uuid
              project_id: uuid
              meter: [ array of {counter_name: string, counter_type: string,
                                 counter_unit: string} ]
            }
    """

    CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    CONNECTION_POOL = pymongo_utils.ConnectionPool()

    GROUP = {
        '_id': '$counter_name',
        'unit': {
            '$min': '$counter_unit'
        },
        'min': {
            '$min': '$counter_volume'
        },
        'max': {
            '$max': '$counter_volume'
        },
        'sum': {
            '$sum': '$counter_volume'
        },
        'count': {
            '$sum': 1
        },
        'duration_start': {
            '$min': '$timestamp'
        },
        'duration_end': {
            '$max': '$timestamp'
        },
    }

    PROJECT = {
        '_id': 0,
        'unit': 1,
        'min': 1,
        'max': 1,
        'sum': 1,
        'count': 1,
        'avg': {
            '$divide': ['$sum', '$count']
        },
        'duration_start': 1,
        'duration_end': 1,
    }

    SORT_OPERATION_MAP = {'desc': pymongo.DESCENDING, 'asc': pymongo.ASCENDING}

    SECONDS_IN_A_DAY = 86400

    def __init__(self, url):

        # Since we are using pymongo, even though we are connecting to DB2
        # we still have to make sure that the scheme which used to distinguish
        # db2 driver from mongodb driver be replaced so that pymongo will not
        # produce an exception on the scheme.
        url = url.replace('db2:', 'mongodb:', 1)
        self.conn = self.CONNECTION_POOL.connect(url)

        # Require MongoDB 2.2 to use aggregate(), since we are using mongodb
        # as backend for test, the following code is necessary to make sure
        # that the test wont try aggregate on older mongodb during the test.
        # For db2, the versionArray won't be part of the server_info, so there
        # will not be exception when real db2 gets used as backend.
        server_info = self.conn.server_info()
        if server_info.get('sysInfo'):
            self._using_mongodb = True
        else:
            self._using_mongodb = False

        if self._using_mongodb and server_info.get('versionArray') < [2, 2]:
            raise storage.StorageBadVersion("Need at least MongoDB 2.2")

        connection_options = pymongo.uri_parser.parse_uri(url)
        self.db = getattr(self.conn, connection_options['database'])
        if connection_options.get('username'):
            self.db.authenticate(connection_options['username'],
                                 connection_options['password'])

        self.upgrade()

    @classmethod
    def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'):
        """Returns a sort_instruction.

        Sort instructions are used in the query to determine what attributes
        to sort on and what direction to use.
        :param q: The query dict passed in.
        :param sort_keys: array of attributes by which results be sorted.
        :param sort_dir: direction in which results be sorted (asc, desc).
        :return: sort parameters
        """
        sort_keys = sort_keys or []
        sort_instructions = []
        _sort_dir = cls.SORT_OPERATION_MAP.get(sort_dir,
                                               cls.SORT_OPERATION_MAP['desc'])

        for _sort_key in sort_keys:
            _instruction = (_sort_key, _sort_dir)
            sort_instructions.append(_instruction)

        return sort_instructions

    def upgrade(self, version=None):
        # Establish indexes
        #
        # We need variations for user_id vs. project_id because of the
        # way the indexes are stored in b-trees. The user_id and
        # project_id values are usually mutually exclusive in the
        # queries, so the database won't take advantage of an index
        # including both.
        if self.db.resource.index_information() == {}:
            resource_id = str(bson.objectid.ObjectId())
            self.db.resource.insert({
                '_id': resource_id,
                'no_key': resource_id
            })
            meter_id = str(bson.objectid.ObjectId())
            timestamp = timeutils.utcnow()
            self.db.meter.insert({
                '_id': meter_id,
                'no_key': meter_id,
                'timestamp': timestamp
            })

            self.db.resource.ensure_index([('user_id', pymongo.ASCENDING),
                                           ('project_id', pymongo.ASCENDING),
                                           ('source', pymongo.ASCENDING)],
                                          name='resource_idx')

            self.db.meter.ensure_index([('resource_id', pymongo.ASCENDING),
                                        ('user_id', pymongo.ASCENDING),
                                        ('project_id', pymongo.ASCENDING),
                                        ('counter_name', pymongo.ASCENDING),
                                        ('timestamp', pymongo.ASCENDING),
                                        ('source', pymongo.ASCENDING)],
                                       name='meter_idx')

            self.db.meter.ensure_index([('timestamp', pymongo.DESCENDING)],
                                       name='timestamp_idx')

            self.db.resource.remove({'_id': resource_id})
            self.db.meter.remove({'_id': meter_id})

        # remove API v1 related table
        self.db.user.drop()
        self.db.project.drop()

    def clear(self):
        # db2 does not support drop_database, remove all collections
        for col in ['resource', 'meter']:
            self.db[col].drop()
        # drop_database command does nothing on db2 database since this has
        # not been implemented. However calling this method is important for
        # removal of all the empty dbs created during the test runs since
        # test run is against mongodb on Jenkins
        self.conn.drop_database(self.db.name)
        self.conn.close()

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        # Record the updated resource metadata
        self.db.resource.update(
            {'_id': data['resource_id']},
            {
                '$set': {
                    'project_id': data['project_id'],
                    'user_id': data['user_id'] or 'null',
                    'metadata': data['resource_metadata'],
                    'source': data['source'],
                },
                '$addToSet': {
                    'meter': {
                        'counter_name': data['counter_name'],
                        'counter_type': data['counter_type'],
                        'counter_unit': data['counter_unit'],
                    },
                },
            },
            upsert=True,
        )

        # Record the raw data for the meter. Use a copy so we do not
        # modify a data structure owned by our caller (the driver adds
        # a new key '_id').
        record = copy.copy(data)
        record['recorded_at'] = timeutils.utcnow()
        # Make sure that the data does have field _id which db2 wont add
        # automatically.
        if record.get('_id') is None:
            record['_id'] = str(bson.objectid.ObjectId())
        self.db.meter.insert(record)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      pagination=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if source is not None:
            q['source'] = source
        if resource is not None:
            q['resource_id'] = resource
        # Add resource_ prefix so it matches the field in the db
        q.update(
            dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery)))

        if start_timestamp or end_timestamp:
            # Look for resources matching the above criteria and with
            # samples in the time range we care about, then change the
            # resource query to return just those resources by id.
            ts_range = pymongo_utils.make_timestamp_range(
                start_timestamp, end_timestamp, start_timestamp_op,
                end_timestamp_op)
            if ts_range:
                q['timestamp'] = ts_range

        sort_keys = base._handle_sort_key('resource', 'timestamp')
        sort_keys.insert(0, 'resource_id')
        sort_instructions = self._build_sort_instructions(sort_keys=sort_keys,
                                                          sort_dir='desc')
        resource = lambda x: x['resource_id']
        meters = self.db.meter.find(q, sort=sort_instructions)
        for resource_id, r_meters in itertools.groupby(meters, key=resource):
            # Because we have to know first/last timestamp, and we need a full
            # list of references to the resource's meters, we need a tuple
            # here.
            r_meters = tuple(r_meters)
            latest_meter = r_meters[0]
            last_ts = latest_meter['timestamp']
            first_ts = r_meters[-1]['timestamp']

            yield models.Resource(resource_id=latest_meter['resource_id'],
                                  project_id=latest_meter['project_id'],
                                  first_sample_timestamp=first_ts,
                                  last_sample_timestamp=last_ts,
                                  source=latest_meter['source'],
                                  user_id=latest_meter['user_id'],
                                  metadata=latest_meter['resource_metadata'])

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instance.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if (groupby and set(groupby) -
                set(['user_id', 'project_id', 'resource_id', 'source'])):
            raise ceilometer.NotImplementedError(
                "Unable to group by these fields")

        if aggregate:
            raise ceilometer.NotImplementedError(
                'Selectable aggregates not implemented')

        q = pymongo_utils.make_query_from_filter(sample_filter)

        if period:
            if sample_filter.start:
                period_start = sample_filter.start
            else:
                period_start = self.db.meter.find(limit=1,
                                                  sort=[('timestamp',
                                                         pymongo.ASCENDING)
                                                        ])[0]['timestamp']

        if groupby:
            sort_keys = ['counter_name'] + groupby + ['timestamp']
        else:
            sort_keys = ['counter_name', 'timestamp']

        sort_instructions = self._build_sort_instructions(sort_keys=sort_keys,
                                                          sort_dir='asc')
        meters = self.db.meter.find(q, sort=sort_instructions)

        def _group_key(meter):
            # the method to define a key for groupby call
            key = {}
            for y in sort_keys:
                if y == 'timestamp' and period:
                    key[y] = (
                        timeutils.delta_seconds(period_start, meter[y]) //
                        period)
                elif y != 'timestamp':
                    key[y] = meter[y]
            return key

        def _to_offset(periods):
            return {
                'days': (periods * period) // self.SECONDS_IN_A_DAY,
                'seconds': (periods * period) % self.SECONDS_IN_A_DAY
            }

        for key, grouped_meters in itertools.groupby(meters, key=_group_key):
            stat = models.Statistics(unit=None,
                                     min=sys.maxint,
                                     max=-sys.maxint,
                                     avg=0,
                                     sum=0,
                                     count=0,
                                     period=0,
                                     period_start=0,
                                     period_end=0,
                                     duration=0,
                                     duration_start=0,
                                     duration_end=0,
                                     groupby=None)

            for meter in grouped_meters:
                stat.unit = meter.get('counter_unit', '')
                m_volume = meter.get('counter_volume')
                if stat.min > m_volume:
                    stat.min = m_volume
                if stat.max < m_volume:
                    stat.max = m_volume
                stat.sum += m_volume
                stat.count += 1
                if stat.duration_start == 0:
                    stat.duration_start = meter['timestamp']
                stat.duration_end = meter['timestamp']
                if groupby and not stat.groupby:
                    stat.groupby = {}
                    for group_key in groupby:
                        stat.groupby[group_key] = meter[group_key]

            stat.duration = timeutils.delta_seconds(stat.duration_start,
                                                    stat.duration_end)
            stat.avg = stat.sum / stat.count
            if period:
                stat.period = period
                periods = key.get('timestamp')
                stat.period_start = (
                    period_start + datetime.timedelta(**(_to_offset(periods))))
                stat.period_end = (
                    period_start +
                    datetime.timedelta(**(_to_offset(periods + 1))))
            else:
                stat.period_start = stat.duration_start
                stat.period_end = stat.duration_end
            yield stat
Ejemplo n.º 16
0
class Connection(base.Connection):
    """Put the data into a SQLAlchemy database.

    Tables::

        - meter
          - meter definition
          - { id: meter def id
              name: meter name
              type: meter type
              unit: meter unit
              }
        - sample
          - the raw incoming data
          - { id: sample id
              meter_id: meter id            (->meter.id)
              user_id: user uuid
              project_id: project uuid
              resource_id: resource uuid
              source_id: source id
              resource_metadata: metadata dictionaries
              volume: sample volume
              timestamp: datetime
              message_signature: message signature
              message_id: message uuid
              }
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        # Set max_retries to 0, since oslo.db in certain cases may attempt
        # to retry making the db connection retried max_retries ^ 2 times
        # in failure case and db reconnection has already been implemented
        # in storage.__init__.get_connection_from_config function
        options = dict(cfg.CONF.database.items())
        options['max_retries'] = 0
        # oslo.db doesn't support options defined by Ceilometer
        for opt in storage.OPTS:
            options.pop(opt.name, None)
        self._engine_facade = db_session.EngineFacade(url, **options)

    def upgrade(self):
        # NOTE(gordc): to minimise memory, only import migration when needed
        from oslo_db.sqlalchemy import migration
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                            '..', 'storage', 'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        engine.dispose()

    def _retrieve_data(self, filter_expr, orderby, limit, table):
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        engine = self._engine_facade.get_engine()
        query = session.query(table)
        transformer = sql_utils.QueryTransformer(table,
                                                 query,
                                                 dialect=engine.dialect.name)
        if filter_expr is not None:
            transformer.apply_filter(filter_expr)

        transformer.apply_options(orderby, limit)

        retrieve = {
            models.Alarm: self._retrieve_alarms,
            models.AlarmChange: self._retrieve_alarm_history
        }
        return retrieve[table](transformer.get_query())

    @staticmethod
    def _row_to_alarm_model(row):
        return alarm_api_models.Alarm(
            alarm_id=row.alarm_id,
            enabled=row.enabled,
            type=row.type,
            name=row.name,
            description=row.description,
            timestamp=row.timestamp,
            user_id=row.user_id,
            project_id=row.project_id,
            state=row.state,
            state_timestamp=row.state_timestamp,
            ok_actions=row.ok_actions,
            alarm_actions=row.alarm_actions,
            insufficient_data_actions=(row.insufficient_data_actions),
            rule=row.rule,
            time_constraints=row.time_constraints,
            repeat_actions=row.repeat_actions,
            severity=row.severity)

    def _retrieve_alarms(self, query):
        return (self._row_to_alarm_model(x) for x in query.all())

    def get_alarms(self,
                   name=None,
                   user=None,
                   state=None,
                   meter=None,
                   project=None,
                   enabled=None,
                   alarm_id=None,
                   alarm_type=None,
                   severity=None):
        """Yields a lists of alarms that match filters.

        :param name: Optional name for alarm.
        :param user: Optional ID for user that owns the resource.
        :param state: Optional string for alarm state.
        :param meter: Optional string for alarms associated with meter.
        :param project: Optional ID for project that owns the resource.
        :param enabled: Optional boolean to list disable alarm.
        :param alarm_id: Optional alarm_id to return one alarm.
        :param alarm_type: Optional alarm type.
        :param severity: Optional alarm severity
        """

        session = self._engine_facade.get_session()
        query = session.query(models.Alarm)
        if name is not None:
            query = query.filter(models.Alarm.name == name)
        if enabled is not None:
            query = query.filter(models.Alarm.enabled == enabled)
        if user is not None:
            query = query.filter(models.Alarm.user_id == user)
        if project is not None:
            query = query.filter(models.Alarm.project_id == project)
        if alarm_id is not None:
            query = query.filter(models.Alarm.alarm_id == alarm_id)
        if state is not None:
            query = query.filter(models.Alarm.state == state)
        if alarm_type is not None:
            query = query.filter(models.Alarm.type == alarm_type)
        if severity is not None:
            query = query.filter(models.Alarm.severity == severity)

        query = query.order_by(desc(models.Alarm.timestamp))
        alarms = self._retrieve_alarms(query)

        # TODO(cmart): improve this by using sqlalchemy.func factory
        if meter is not None:
            alarms = filter(
                lambda row: row.rule.get('meter_name', None) == meter, alarms)

        return alarms

    def create_alarm(self, alarm):
        """Create an alarm.

        :param alarm: The alarm to create.
        """
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_row = models.Alarm(alarm_id=alarm.alarm_id)
            alarm_row.update(alarm.as_dict())
            session.add(alarm_row)

        return self._row_to_alarm_model(alarm_row)

    def update_alarm(self, alarm):
        """Update an alarm.

        :param alarm: the new Alarm to update
        """
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_row = session.merge(models.Alarm(alarm_id=alarm.alarm_id))
            alarm_row.update(alarm.as_dict())

        return self._row_to_alarm_model(alarm_row)

    def delete_alarm(self, alarm_id):
        """Delete an alarm and its history data.

        :param alarm_id: ID of the alarm to delete
        """
        session = self._engine_facade.get_session()
        with session.begin():
            session.query(models.Alarm).filter(
                models.Alarm.alarm_id == alarm_id).delete()
            # FIXME(liusheng): we should use delete cascade
            session.query(models.AlarmChange).filter(
                models.AlarmChange.alarm_id == alarm_id).delete()

    @staticmethod
    def _row_to_alarm_change_model(row):
        return alarm_api_models.AlarmChange(event_id=row.event_id,
                                            alarm_id=row.alarm_id,
                                            type=row.type,
                                            detail=row.detail,
                                            user_id=row.user_id,
                                            project_id=row.project_id,
                                            on_behalf_of=row.on_behalf_of,
                                            timestamp=row.timestamp)

    def query_alarms(self, filter_expr=None, orderby=None, limit=None):
        """Yields a lists of alarms that match filter."""
        return self._retrieve_data(filter_expr, orderby, limit, models.Alarm)

    def _retrieve_alarm_history(self, query):
        return (self._row_to_alarm_change_model(x) for x in query.all())

    def query_alarm_history(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.AlarmChange objects."""
        return self._retrieve_data(filter_expr, orderby, limit,
                                   models.AlarmChange)

    def get_alarm_changes(self,
                          alarm_id,
                          on_behalf_of,
                          user=None,
                          project=None,
                          alarm_type=None,
                          severity=None,
                          start_timestamp=None,
                          start_timestamp_op=None,
                          end_timestamp=None,
                          end_timestamp_op=None):
        """Yields list of AlarmChanges describing alarm history

        Changes are always sorted in reverse order of occurrence, given
        the importance of currency.

        Segregation for non-administrative users is done on the basis
        of the on_behalf_of parameter. This allows such users to have
        visibility on both the changes initiated by themselves directly
        (generally creation, rule changes, or deletion) and also on those
        changes initiated on their behalf by the alarming service (state
        transitions after alarm thresholds are crossed).

        :param alarm_id: ID of alarm to return changes for
        :param on_behalf_of: ID of tenant to scope changes query (None for
                             administrative user, indicating all projects)
        :param user: Optional ID of user to return changes for
        :param project: Optional ID of project to return changes for
        :param alarm_type: Optional change type
        :param severity: Optional alarm severity
        :param start_timestamp: Optional modified timestamp start range
        :param start_timestamp_op: Optional timestamp start range operation
        :param end_timestamp: Optional modified timestamp end range
        :param end_timestamp_op: Optional timestamp end range operation
        """
        session = self._engine_facade.get_session()
        query = session.query(models.AlarmChange)
        query = query.filter(models.AlarmChange.alarm_id == alarm_id)

        if on_behalf_of is not None:
            query = query.filter(
                models.AlarmChange.on_behalf_of == on_behalf_of)
        if user is not None:
            query = query.filter(models.AlarmChange.user_id == user)
        if project is not None:
            query = query.filter(models.AlarmChange.project_id == project)
        if alarm_type is not None:
            query = query.filter(models.AlarmChange.type == alarm_type)
        if severity is not None:
            query = query.filter(models.AlarmChange.severity == severity)
        if start_timestamp:
            if start_timestamp_op == 'gt':
                query = query.filter(
                    models.AlarmChange.timestamp > start_timestamp)
            else:
                query = query.filter(
                    models.AlarmChange.timestamp >= start_timestamp)
        if end_timestamp:
            if end_timestamp_op == 'le':
                query = query.filter(
                    models.AlarmChange.timestamp <= end_timestamp)
            else:
                query = query.filter(
                    models.AlarmChange.timestamp < end_timestamp)

        query = query.order_by(desc(models.AlarmChange.timestamp))
        return self._retrieve_alarm_history(query)

    def record_alarm_change(self, alarm_change):
        """Record alarm change event."""
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_change_row = models.AlarmChange(
                event_id=alarm_change['event_id'])
            alarm_change_row.update(alarm_change)
            session.add(alarm_change_row)

    def clear_expired_alarm_history_data(self, alarm_history_ttl):
        """Clear expired alarm history data from the backend storage system.

        Clearing occurs according to the time-to-live.

        :param alarm_history_ttl: Number of seconds to keep alarm history
                                  records for.
        """
        session = self._engine_facade.get_session()
        with session.begin():
            valid_start = (timeutils.utcnow() -
                           datetime.timedelta(seconds=alarm_history_ttl))
            deleted_rows = (session.query(models.AlarmChange).filter(
                models.AlarmChange.timestamp < valid_start).delete())
            LOG.info(_LI("%d alarm histories are removed from database"),
                     deleted_rows)
Ejemplo n.º 17
0
class Connection(base.Connection):
    """Base Alarm Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def update_alarm(self, alarm):
        """Update alarm."""
        data = alarm.as_dict()

        self.db.alarm.update(
            {'alarm_id': alarm.alarm_id},
            {'$set': data},
            upsert=True)

        stored_alarm = self.db.alarm.find({'alarm_id': alarm.alarm_id})[0]
        del stored_alarm['_id']
        self._ensure_encapsulated_rule_format(stored_alarm)
        self._ensure_time_constraints(stored_alarm)
        return models.Alarm(**stored_alarm)

    create_alarm = update_alarm

    def delete_alarm(self, alarm_id):
        """Delete an alarm."""
        self.db.alarm.remove({'alarm_id': alarm_id})

    def record_alarm_change(self, alarm_change):
        """Record alarm change event."""
        self.db.alarm_history.insert(alarm_change.copy())

    def get_alarms(self, name=None, user=None, state=None, meter=None,
                   project=None, enabled=None, alarm_id=None, pagination=None):
        """Yields a lists of alarms that match filters

        :param name: The Alarm name.
        :param user: Optional ID for user that owns the resource.
        :param state: Optional string for alarm state.
        :param meter: Optional string for alarms associated with meter.
        :param project: Optional ID for project that owns the resource.
        :param enabled: Optional boolean to list disable alarm.
        :param alarm_id: Optional alarm_id to return one alarm.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if name is not None:
            q['name'] = name
        if enabled is not None:
            q['enabled'] = enabled
        if alarm_id is not None:
            q['alarm_id'] = alarm_id
        if state is not None:
            q['state'] = state
        if meter is not None:
            q['rule.meter_name'] = meter

        return self._retrieve_alarms(q, [], None)

    def get_alarm_changes(self, alarm_id, on_behalf_of,
                          user=None, project=None, type=None,
                          start_timestamp=None, start_timestamp_op=None,
                          end_timestamp=None, end_timestamp_op=None):
        """Yields list of AlarmChanges describing alarm history

        Changes are always sorted in reverse order of occurrence, given
        the importance of currency.

        Segregation for non-administrative users is done on the basis
        of the on_behalf_of parameter. This allows such users to have
        visibility on both the changes initiated by themselves directly
        (generally creation, rule changes, or deletion) and also on those
        changes initiated on their behalf by the alarming service (state
        transitions after alarm thresholds are crossed).

        :param alarm_id: ID of alarm to return changes for
        :param on_behalf_of: ID of tenant to scope changes query (None for
                             administrative user, indicating all projects)
        :param user: Optional ID of user to return changes for
        :param project: Optional ID of project to return changes for
        :project type: Optional change type
        :param start_timestamp: Optional modified timestamp start range
        :param start_timestamp_op: Optional timestamp start range operation
        :param end_timestamp: Optional modified timestamp end range
        :param end_timestamp_op: Optional timestamp end range operation
        """
        q = dict(alarm_id=alarm_id)
        if on_behalf_of is not None:
            q['on_behalf_of'] = on_behalf_of
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if type is not None:
            q['type'] = type
        if start_timestamp or end_timestamp:
            ts_range = pymongo_utils.make_timestamp_range(start_timestamp,
                                                          end_timestamp,
                                                          start_timestamp_op,
                                                          end_timestamp_op)
            if ts_range:
                q['timestamp'] = ts_range

        return self._retrieve_alarm_changes(q,
                                            [("timestamp",
                                              pymongo.DESCENDING)],
                                            None)

    def query_alarms(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.Alarm objects."""
        return self._retrieve_data(filter_expr, orderby, limit,
                                   models.Alarm)

    def query_alarm_history(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.AlarmChange objects."""
        return self._retrieve_data(filter_expr,
                                   orderby,
                                   limit,
                                   models.AlarmChange)

    def _retrieve_data(self, filter_expr, orderby, limit, model):
        if limit == 0:
            return []
        query_filter = {}
        orderby_filter = [("timestamp", pymongo.DESCENDING)]
        transformer = pymongo_utils.QueryTransformer()
        if orderby is not None:
            orderby_filter = transformer.transform_orderby(orderby)
        if filter_expr is not None:
            query_filter = transformer.transform_filter(filter_expr)

        retrieve = {models.Alarm: self._retrieve_alarms,
                    models.AlarmChange: self._retrieve_alarm_changes}
        return retrieve[model](query_filter, orderby_filter, limit)

    def _retrieve_alarms(self, query_filter, orderby, limit):
        if limit is not None:
            alarms = self.db.alarm.find(query_filter,
                                        limit=limit,
                                        sort=orderby)
        else:
            alarms = self.db.alarm.find(query_filter, sort=orderby)

        for alarm in alarms:
            a = {}
            a.update(alarm)
            del a['_id']
            self._ensure_encapsulated_rule_format(a)
            self._ensure_time_constraints(a)
            yield models.Alarm(**a)

    def _retrieve_alarm_changes(self, query_filter, orderby, limit):
        if limit is not None:
            alarms_history = self.db.alarm_history.find(query_filter,
                                                        limit=limit,
                                                        sort=orderby)
        else:
            alarms_history = self.db.alarm_history.find(
                query_filter, sort=orderby)

        for alarm_history in alarms_history:
            ah = {}
            ah.update(alarm_history)
            del ah['_id']
            yield models.AlarmChange(**ah)

    @classmethod
    def _ensure_encapsulated_rule_format(cls, alarm):
        """Ensure the alarm returned by the storage have the correct format.

        The previous format looks like:
        {
            'alarm_id': '0ld-4l3rt',
            'enabled': True,
            'name': 'old-alert',
            'description': 'old-alert',
            'timestamp': None,
            'meter_name': 'cpu',
            'user_id': 'me',
            'project_id': 'and-da-boys',
            'comparison_operator': 'lt',
            'threshold': 36,
            'statistic': 'count',
            'evaluation_periods': 1,
            'period': 60,
            'state': "insufficient data",
            'state_timestamp': None,
            'ok_actions': [],
            'alarm_actions': ['http://nowhere/alarms'],
            'insufficient_data_actions': [],
            'repeat_actions': False,
            'matching_metadata': {'key': 'value'}
            # or 'matching_metadata': [{'key': 'key', 'value': 'value'}]
        }
        """

        if isinstance(alarm.get('rule'), dict):
            return

        alarm['type'] = 'threshold'
        alarm['rule'] = {}
        alarm['matching_metadata'] = cls._decode_matching_metadata(
            alarm['matching_metadata'])
        for field in ['period', 'evaluation_periods', 'threshold',
                      'statistic', 'comparison_operator', 'meter_name']:
            if field in alarm:
                alarm['rule'][field] = alarm[field]
                del alarm[field]

        query = []
        for key in alarm['matching_metadata']:
            query.append({'field': key,
                          'op': 'eq',
                          'value': alarm['matching_metadata'][key],
                          'type': 'string'})
        del alarm['matching_metadata']
        alarm['rule']['query'] = query

    @staticmethod
    def _decode_matching_metadata(matching_metadata):
        if isinstance(matching_metadata, dict):
            # note(sileht): keep compatibility with alarm
            # with matching_metadata as a dict
            return matching_metadata
        else:
            new_matching_metadata = {}
            for elem in matching_metadata:
                new_matching_metadata[elem['key']] = elem['value']
            return new_matching_metadata

    @staticmethod
    def _ensure_time_constraints(alarm):
        """Ensures the alarm has a time constraints field."""
        if 'time_constraints' not in alarm:
            alarm['time_constraints'] = []
Ejemplo n.º 18
0
class Connection(base.Connection):
    """Put the data into a SQLAlchemy database.

    Tables::

        - meter
          - meter definition
          - { id: meter def id
              name: meter name
              type: meter type
              unit: meter unit
              }
        - sample
          - the raw incoming data
          - { id: sample id
              meter_id: meter id            (->meter.id)
              user_id: user uuid
              project_id: project uuid
              resource_id: resource uuid
              source_id: source id
              resource_metadata: metadata dictionaries
              volume: sample volume
              timestamp: datetime
              message_signature: message signature
              message_id: message uuid
              }
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)

    def __init__(self, url):
        self._engine_facade = sqlalchemy_session.EngineFacade.from_config(
            url,
            cfg.CONF  # TODO(Alexei_987) Remove access to global CONF object
        )

    def upgrade(self):
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                            'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        self._engine_facade._session_maker.close_all()
        engine.dispose()

    @staticmethod
    def _create_meter(session, name, type, unit):
        try:
            nested = session.connection().dialect.name != 'sqlite'
            with session.begin(nested=nested, subtransactions=not nested):
                obj = session.query(models.Meter)\
                    .filter(models.Meter.name == name)\
                    .filter(models.Meter.type == type)\
                    .filter(models.Meter.unit == unit).first()
                if obj is None:
                    obj = models.Meter(name=name, type=type, unit=unit)
                    session.add(obj)
        except dbexc.DBDuplicateEntry:
            # retry function to pick up duplicate committed object
            obj = Connection._create_meter(session, name, type, unit)

        return obj

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        session = self._engine_facade.get_session()
        with session.begin():
            # Record the raw data for the sample.
            rmetadata = data['resource_metadata']
            meter = self._create_meter(session, data['counter_name'],
                                       data['counter_type'],
                                       data['counter_unit'])
            sample = models.Sample(meter_id=meter.id)
            session.add(sample)
            sample.resource_id = data['resource_id']
            sample.project_id = data['project_id']
            sample.user_id = data['user_id']
            sample.timestamp = data['timestamp']
            sample.resource_metadata = rmetadata
            sample.volume = data['counter_volume']
            sample.message_signature = data['message_signature']
            sample.message_id = data['message_id']
            sample.source_id = data['source']
            session.flush()

            if rmetadata:
                if isinstance(rmetadata, dict):
                    for key, v in utils.dict_to_keyval(rmetadata):
                        try:
                            _model = META_TYPE_MAP[type(v)]
                        except KeyError:
                            LOG.warn(
                                _("Unknown metadata type. Key (%s) will "
                                  "not be queryable."), key)
                        else:
                            session.add(
                                _model(id=sample.id, meta_key=key, value=v))

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system according to the
        time-to-live.

        :param ttl: Number of seconds to keep records for.

        """

        session = self._engine_facade.get_session()
        with session.begin():
            end = timeutils.utcnow() - datetime.timedelta(seconds=ttl)
            sample_query = session.query(models.Sample)\
                .filter(models.Sample.timestamp < end)
            for sample_obj in sample_query.all():
                session.delete(sample_obj)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      pagination=None):
        """Return an iterable of api_models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        def _apply_filters(query):
            # TODO(gordc) this should be merged with make_query_from_filter
            for column, value in [(models.Sample.resource_id, resource),
                                  (models.Sample.user_id, user),
                                  (models.Sample.project_id, project),
                                  (models.Sample.source_id, source)]:
                if value:
                    query = query.filter(column == value)
            if metaquery:
                query = apply_metaquery_filter(session, query, metaquery)
            if start_timestamp:
                if start_timestamp_op == 'gt':
                    query = query.filter(
                        models.Sample.timestamp > start_timestamp)
                else:
                    query = query.filter(
                        models.Sample.timestamp >= start_timestamp)
            if end_timestamp:
                if end_timestamp_op == 'le':
                    query = query.filter(
                        models.Sample.timestamp <= end_timestamp)
                else:
                    query = query.filter(
                        models.Sample.timestamp < end_timestamp)
            return query

        session = self._engine_facade.get_session()
        # get list of resource_ids
        res_q = session.query(distinct(models.Sample.resource_id))
        res_q = _apply_filters(res_q)

        for res_id in res_q.all():
            # get latest Sample
            max_q = session.query(models.Sample)\
                .filter(models.Sample.resource_id == res_id[0])
            max_q = _apply_filters(max_q)
            max_q = max_q.order_by(models.Sample.timestamp.desc(),
                                   models.Sample.id.desc()).limit(1)

            # get the min timestamp value.
            min_q = session.query(models.Sample.timestamp)\
                .filter(models.Sample.resource_id == res_id[0])
            min_q = _apply_filters(min_q)
            min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1)

            sample = max_q.first()
            if sample:
                yield api_models.Resource(
                    resource_id=sample.resource_id,
                    project_id=sample.project_id,
                    first_sample_timestamp=min_q.first().timestamp,
                    last_sample_timestamp=sample.timestamp,
                    source=sample.source_id,
                    user_id=sample.user_id,
                    metadata=sample.resource_metadata)

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   pagination=None):
        """Return an iterable of api_models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional ID of the resource.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        def _apply_filters(query):
            # TODO(gordc) this should be merged with make_query_from_filter
            for column, value in [(models.Sample.resource_id, resource),
                                  (models.Sample.user_id, user),
                                  (models.Sample.project_id, project),
                                  (models.Sample.source_id, source)]:
                if value:
                    query = query.filter(column == value)
            if metaquery:
                query = apply_metaquery_filter(session, query, metaquery)
            return query

        session = self._engine_facade.get_session()

        # sample_subq is used to reduce sample records
        # by selecting a record for each (resource_id, meter_id).
        # max() is used to choice a sample record, so the latest record
        # is selected for each (resource_id, meter_id).
        sample_subq = session.query(
            func.max(models.Sample.id).label('id'))\
            .group_by(models.Sample.meter_id, models.Sample.resource_id)
        sample_subq = sample_subq.subquery()

        # SELECT sample.* FROM sample INNER JOIN
        #  (SELECT max(sample.id) AS id FROM sample
        #   GROUP BY sample.resource_id, sample.meter_id) AS anon_2
        # ON sample.id = anon_2.id
        query_sample = session.query(models.MeterSample).\
            join(sample_subq, models.MeterSample.id == sample_subq.c.id)
        query_sample = _apply_filters(query_sample)

        for sample in query_sample.all():
            yield api_models.Meter(name=sample.counter_name,
                                   type=sample.counter_type,
                                   unit=sample.counter_unit,
                                   resource_id=sample.resource_id,
                                   project_id=sample.project_id,
                                   source=sample.source_id,
                                   user_id=sample.user_id)

    def _retrieve_samples(self, query):
        samples = query.all()

        for s in samples:
            # Remove the id generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            yield api_models.Sample(
                source=s.source_id,
                counter_name=s.counter_name,
                counter_type=s.counter_type,
                counter_unit=s.counter_unit,
                counter_volume=s.counter_volume,
                user_id=s.user_id,
                project_id=s.project_id,
                resource_id=s.resource_id,
                timestamp=s.timestamp,
                recorded_at=s.recorded_at,
                resource_metadata=s.resource_metadata,
                message_id=s.message_id,
                message_signature=s.message_signature,
            )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of api_models.Samples.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []

        table = models.MeterSample
        session = self._engine_facade.get_session()
        query = session.query(table)
        query = make_query_from_filter(session,
                                       query,
                                       sample_filter,
                                       require_meter=False)
        transformer = QueryTransformer(table, query)
        transformer.apply_options(None, limit)
        return self._retrieve_samples(transformer.get_query())

    def _retrieve_data(self, filter_expr, orderby, limit, table):
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        query = session.query(table)
        transformer = QueryTransformer(table, query)
        if filter_expr is not None:
            transformer.apply_filter(filter_expr)

        transformer.apply_options(orderby, limit)

        retrieve = {
            models.MeterSample: self._retrieve_samples,
            models.Alarm: self._retrieve_alarms,
            models.AlarmChange: self._retrieve_alarm_history
        }
        return retrieve[table](transformer.get_query())

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        return self._retrieve_data(filter_expr, orderby, limit,
                                   models.MeterSample)

    @staticmethod
    def _get_aggregate_functions(aggregate):
        if not aggregate:
            return [f for f in STANDARD_AGGREGATES.values()]

        functions = []

        for a in aggregate:
            if a.func in STANDARD_AGGREGATES:
                functions.append(STANDARD_AGGREGATES[a.func])
            elif a.func in UNPARAMETERIZED_AGGREGATES:
                functions.append(UNPARAMETERIZED_AGGREGATES[a.func])
            elif a.func in PARAMETERIZED_AGGREGATES['compute']:
                validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func)
                if not (validate and validate(a.param)):
                    raise storage.StorageBadAggregate('Bad aggregate: %s.%s' %
                                                      (a.func, a.param))
                compute = PARAMETERIZED_AGGREGATES['compute'][a.func]
                functions.append(compute(a.param))
            else:
                raise NotImplementedError('Selectable aggregate function %s'
                                          ' is not supported' % a.func)

        return functions

    def _make_stats_query(self, sample_filter, groupby, aggregate):

        select = [
            models.Meter.unit,
            func.min(models.Sample.timestamp).label('tsmin'),
            func.max(models.Sample.timestamp).label('tsmax'),
        ]

        select.extend(self._get_aggregate_functions(aggregate))

        session = self._engine_facade.get_session()

        if groupby:
            group_attributes = [getattr(models.Sample, g) for g in groupby]
            select.extend(group_attributes)

        query = session.query(*select).filter(
            models.Meter.id == models.Sample.meter_id)\
            .group_by(models.Meter.unit)

        if groupby:
            query = query.group_by(*group_attributes)

        return make_query_from_filter(session, query, sample_filter)

    @staticmethod
    def _stats_result_aggregates(result, aggregate):
        stats_args = {}
        if isinstance(result.count, (int, long)):
            stats_args['count'] = result.count
        for attr in ['min', 'max', 'sum', 'avg']:
            if hasattr(result, attr):
                stats_args[attr] = getattr(result, attr)
        if aggregate:
            stats_args['aggregate'] = {}
            for a in aggregate:
                key = '%s%s' % (a.func, '/%s' % a.param if a.param else '')
                stats_args['aggregate'][key] = getattr(result, key)
        return stats_args

    @staticmethod
    def _stats_result_to_model(result, period, period_start, period_end,
                               groupby, aggregate):
        stats_args = Connection._stats_result_aggregates(result, aggregate)
        stats_args['unit'] = result.unit
        duration = (timeutils.delta_seconds(result.tsmin, result.tsmax)
                    if result.tsmin is not None and result.tsmax is not None
                    else None)
        stats_args['duration'] = duration
        stats_args['duration_start'] = result.tsmin
        stats_args['duration_end'] = result.tsmax
        stats_args['period'] = period
        stats_args['period_start'] = period_start
        stats_args['period_end'] = period_end
        stats_args['groupby'] = (dict(
            (g, getattr(result, g)) for g in groupby) if groupby else None)
        return api_models.Statistics(**stats_args)

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of api_models.Statistics instances containing
        meter statistics described by the query parameters.

        The filter must have a meter value set.

        """
        if groupby:
            for group in groupby:
                if group not in ['user_id', 'project_id', 'resource_id']:
                    raise NotImplementedError('Unable to group by '
                                              'these fields')

        if not period:
            for res in self._make_stats_query(sample_filter, groupby,
                                              aggregate):
                if res.count:
                    yield self._stats_result_to_model(res, 0, res.tsmin,
                                                      res.tsmax, groupby,
                                                      aggregate)
            return

        if not sample_filter.start or not sample_filter.end:
            res = self._make_stats_query(sample_filter, None,
                                         aggregate).first()
            if not res:
                # NOTE(liusheng):The 'res' may be NoneType, because no
                # sample has found with sample filter(s).
                return

        query = self._make_stats_query(sample_filter, groupby, aggregate)
        # HACK(jd) This is an awful method to compute stats by period, but
        # since we're trying to be SQL agnostic we have to write portable
        # code, so here it is, admire! We're going to do one request to get
        # stats by period. We would like to use GROUP BY, but there's no
        # portable way to manipulate timestamp in SQL, so we can't.
        for period_start, period_end in base.iter_period(
                sample_filter.start or res.tsmin, sample_filter.end
                or res.tsmax, period):
            q = query.filter(models.Sample.timestamp >= period_start)
            q = q.filter(models.Sample.timestamp < period_end)
            for r in q.all():
                if r.count:
                    yield self._stats_result_to_model(
                        result=r,
                        period=int(
                            timeutils.delta_seconds(period_start, period_end)),
                        period_start=period_start,
                        period_end=period_end,
                        groupby=groupby,
                        aggregate=aggregate)

    @staticmethod
    def _row_to_alarm_model(row):
        return api_models.Alarm(
            alarm_id=row.alarm_id,
            enabled=row.enabled,
            type=row.type,
            name=row.name,
            description=row.description,
            timestamp=row.timestamp,
            user_id=row.user_id,
            project_id=row.project_id,
            state=row.state,
            state_timestamp=row.state_timestamp,
            ok_actions=row.ok_actions,
            alarm_actions=row.alarm_actions,
            insufficient_data_actions=row.insufficient_data_actions,
            rule=row.rule,
            time_constraints=row.time_constraints,
            repeat_actions=row.repeat_actions)

    def _retrieve_alarms(self, query):
        return (self._row_to_alarm_model(x) for x in query.all())

    def get_alarms(self,
                   name=None,
                   user=None,
                   project=None,
                   enabled=None,
                   alarm_id=None,
                   pagination=None):
        """Yields a lists of alarms that match filters
        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param enabled: Optional boolean to list disable alarm.
        :param alarm_id: Optional alarm_id to return one alarm.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise NotImplementedError('Pagination not implemented')

        session = self._engine_facade.get_session()
        query = session.query(models.Alarm)
        if name is not None:
            query = query.filter(models.Alarm.name == name)
        if enabled is not None:
            query = query.filter(models.Alarm.enabled == enabled)
        if user is not None:
            query = query.filter(models.Alarm.user_id == user)
        if project is not None:
            query = query.filter(models.Alarm.project_id == project)
        if alarm_id is not None:
            query = query.filter(models.Alarm.alarm_id == alarm_id)

        return self._retrieve_alarms(query)

    def create_alarm(self, alarm):
        """Create an alarm.

        :param alarm: The alarm to create.
        """
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_row = models.Alarm(alarm_id=alarm.alarm_id)
            alarm_row.update(alarm.as_dict())
            session.add(alarm_row)

        return self._row_to_alarm_model(alarm_row)

    def update_alarm(self, alarm):
        """Update an alarm.

        :param alarm: the new Alarm to update
        """
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_row = session.merge(models.Alarm(alarm_id=alarm.alarm_id))
            alarm_row.update(alarm.as_dict())

        return self._row_to_alarm_model(alarm_row)

    def delete_alarm(self, alarm_id):
        """Delete an alarm

        :param alarm_id: ID of the alarm to delete
        """
        session = self._engine_facade.get_session()
        with session.begin():
            session.query(models.Alarm).filter(
                models.Alarm.alarm_id == alarm_id).delete()

    @staticmethod
    def _row_to_alarm_change_model(row):
        return api_models.AlarmChange(event_id=row.event_id,
                                      alarm_id=row.alarm_id,
                                      type=row.type,
                                      detail=row.detail,
                                      user_id=row.user_id,
                                      project_id=row.project_id,
                                      on_behalf_of=row.on_behalf_of,
                                      timestamp=row.timestamp)

    def query_alarms(self, filter_expr=None, orderby=None, limit=None):
        """Yields a lists of alarms that match filter
        """
        return self._retrieve_data(filter_expr, orderby, limit, models.Alarm)

    def _retrieve_alarm_history(self, query):
        return (self._row_to_alarm_change_model(x) for x in query.all())

    def query_alarm_history(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.AlarmChange objects.
        """
        return self._retrieve_data(filter_expr, orderby, limit,
                                   models.AlarmChange)

    def get_alarm_changes(self,
                          alarm_id,
                          on_behalf_of,
                          user=None,
                          project=None,
                          type=None,
                          start_timestamp=None,
                          start_timestamp_op=None,
                          end_timestamp=None,
                          end_timestamp_op=None):
        """Yields list of AlarmChanges describing alarm history

        Changes are always sorted in reverse order of occurrence, given
        the importance of currency.

        Segregation for non-administrative users is done on the basis
        of the on_behalf_of parameter. This allows such users to have
        visibility on both the changes initiated by themselves directly
        (generally creation, rule changes, or deletion) and also on those
        changes initiated on their behalf by the alarming service (state
        transitions after alarm thresholds are crossed).

        :param alarm_id: ID of alarm to return changes for
        :param on_behalf_of: ID of tenant to scope changes query (None for
                             administrative user, indicating all projects)
        :param user: Optional ID of user to return changes for
        :param project: Optional ID of project to return changes for
        :project type: Optional change type
        :param start_timestamp: Optional modified timestamp start range
        :param start_timestamp_op: Optional timestamp start range operation
        :param end_timestamp: Optional modified timestamp end range
        :param end_timestamp_op: Optional timestamp end range operation
        """
        session = self._engine_facade.get_session()
        query = session.query(models.AlarmChange)
        query = query.filter(models.AlarmChange.alarm_id == alarm_id)

        if on_behalf_of is not None:
            query = query.filter(
                models.AlarmChange.on_behalf_of == on_behalf_of)
        if user is not None:
            query = query.filter(models.AlarmChange.user_id == user)
        if project is not None:
            query = query.filter(models.AlarmChange.project_id == project)
        if type is not None:
            query = query.filter(models.AlarmChange.type == type)
        if start_timestamp:
            if start_timestamp_op == 'gt':
                query = query.filter(
                    models.AlarmChange.timestamp > start_timestamp)
            else:
                query = query.filter(
                    models.AlarmChange.timestamp >= start_timestamp)
        if end_timestamp:
            if end_timestamp_op == 'le':
                query = query.filter(
                    models.AlarmChange.timestamp <= end_timestamp)
            else:
                query = query.filter(
                    models.AlarmChange.timestamp < end_timestamp)

        query = query.order_by(desc(models.AlarmChange.timestamp))
        return self._retrieve_alarm_history(query)

    def record_alarm_change(self, alarm_change):
        """Record alarm change event.
        """
        session = self._engine_facade.get_session()
        with session.begin():
            alarm_change_row = models.AlarmChange(
                event_id=alarm_change['event_id'])
            alarm_change_row.update(alarm_change)
            session.add(alarm_change_row)

    def _get_or_create_trait_type(self, trait_type, data_type, session=None):
        """Find if this trait already exists in the database, and
        if it does not, create a new entry in the trait type table.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            tt = session.query(models.TraitType).filter(
                models.TraitType.desc == trait_type,
                models.TraitType.data_type == data_type).first()
            if not tt:
                tt = models.TraitType(trait_type, data_type)
                session.add(tt)
        return tt

    def _make_trait(self, trait_model, event, session=None):
        """Make a new Trait from a Trait model.

        Doesn't flush or add to session.
        """
        trait_type = self._get_or_create_trait_type(trait_model.name,
                                                    trait_model.dtype, session)
        value_map = models.Trait._value_map
        values = {
            't_string': None,
            't_float': None,
            't_int': None,
            't_datetime': None
        }
        value = trait_model.value
        values[value_map[trait_model.dtype]] = value
        return models.Trait(trait_type, event, **values)

    def _get_or_create_event_type(self, event_type, session=None):
        """Here, we check to see if an event type with the supplied
        name already exists. If not, we create it and return the record.

        This may result in a flush.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            et = session.query(models.EventType).filter(
                models.EventType.desc == event_type).first()
            if not et:
                et = models.EventType(event_type)
                session.add(et)
        return et

    def _record_event(self, session, event_model):
        """Store a single Event, including related Traits.
        """
        with session.begin(subtransactions=True):
            event_type = self._get_or_create_event_type(event_model.event_type,
                                                        session=session)

            event = models.Event(event_model.message_id, event_type,
                                 event_model.generated)
            session.add(event)

            new_traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    t = self._make_trait(trait, event, session=session)
                    session.add(t)
                    new_traits.append(t)

        # Note: we don't flush here, explicitly (unless a new trait or event
        # does it). Otherwise, just wait until all the Events are staged.
        return (event, new_traits)

    def record_events(self, event_models):
        """Write the events to SQL database via sqlalchemy.

        :param event_models: a list of model.Event objects.

        Returns a list of events that could not be saved in a
        (reason, event) tuple. Reasons are enumerated in
        storage.model.Event

        Flush when they're all added, unless new EventTypes or
        TraitTypes are added along the way.
        """
        session = self._engine_facade.get_session()
        events = []
        problem_events = []
        for event_model in event_models:
            event = None
            try:
                with session.begin():
                    event = self._record_event(session, event_model)
            except dbexc.DBDuplicateEntry:
                problem_events.append(
                    (api_models.Event.DUPLICATE, event_model))
            except Exception as e:
                LOG.exception(_('Failed to record event: %s') % e)
                problem_events.append(
                    (api_models.Event.UNKNOWN_PROBLEM, event_model))
            events.append(event)
        return problem_events

    def get_events(self, event_filter):
        """Return an iterable of model.Event objects.

        :param event_filter: EventFilter instance
        """

        start = event_filter.start_time
        end = event_filter.end_time
        session = self._engine_facade.get_session()
        LOG.debug(_("Getting events that match filter: %s") % event_filter)
        with session.begin():
            event_query = session.query(models.Event)

            # Build up the join conditions
            event_join_conditions = [
                models.EventType.id == models.Event.event_type_id
            ]

            if event_filter.event_type:
                event_join_conditions\
                    .append(models.EventType.desc == event_filter.event_type)

            event_query = event_query.join(models.EventType,
                                           and_(*event_join_conditions))

            # Build up the where conditions
            event_filter_conditions = []
            if event_filter.message_id:
                event_filter_conditions\
                    .append(models.Event.message_id == event_filter.message_id)
            if start:
                event_filter_conditions.append(models.Event.generated >= start)
            if end:
                event_filter_conditions.append(models.Event.generated <= end)

            if event_filter_conditions:
                event_query = event_query\
                    .filter(and_(*event_filter_conditions))

            event_models_dict = {}
            if event_filter.traits_filter:
                for trait_filter in event_filter.traits_filter:

                    # Build a sub query that joins Trait to TraitType
                    # where the trait name matches
                    trait_name = trait_filter.pop('key')
                    conditions = [
                        models.Trait.trait_type_id == models.TraitType.id,
                        models.TraitType.desc == trait_name
                    ]

                    for key, value in trait_filter.iteritems():
                        if key == 'string':
                            conditions.append(models.Trait.t_string == value)
                        elif key == 'integer':
                            conditions.append(models.Trait.t_int == value)
                        elif key == 'datetime':
                            conditions.append(models.Trait.t_datetime == value)
                        elif key == 'float':
                            conditions.append(models.Trait.t_float == value)

                    trait_query = session.query(models.Trait.event_id)\
                        .join(models.TraitType, and_(*conditions)).subquery()

                    event_query = event_query\
                        .join(trait_query,
                              models.Event.id == trait_query.c.event_id)
            else:
                # If there are no trait filters, grab the events from the db
                query = session.query(models.Event.id,
                                      models.Event.generated,
                                      models.Event.message_id,
                                      models.EventType.desc)\
                    .join(models.EventType,
                          and_(*event_join_conditions))
                if event_filter_conditions:
                    query = query.filter(and_(*event_filter_conditions))
                for (id, generated, message_id, desc) in query.all():
                    event_models_dict[id] = api_models.Event(
                        message_id, desc, generated, [])

            # Build event models for the events
            event_query = event_query.subquery()
            query = session.query(models.Trait)\
                .join(models.TraitType,
                      models.Trait.trait_type_id == models.TraitType.id)\
                .join(event_query, models.Trait.event_id == event_query.c.id)

            # Now convert the sqlalchemy objects back into Models ...
            for trait in query.all():
                event = event_models_dict.get(trait.event_id)
                if not event:
                    event = api_models.Event(trait.event.message_id,
                                             trait.event.event_type.desc,
                                             trait.event.generated, [])
                    event_models_dict[trait.event_id] = event
                trait_model = api_models.Trait(trait.trait_type.desc,
                                               trait.trait_type.data_type,
                                               trait.get_value())
                event.append_trait(trait_model)

        event_models = event_models_dict.values()
        return sorted(event_models, key=operator.attrgetter('generated'))

    def get_event_types(self):
        """Return all event types as an iterable of strings.
        """

        session = self._engine_facade.get_session()
        with session.begin():
            query = session.query(models.EventType.desc)\
                .order_by(models.EventType.desc)
            for name in query.all():
                # The query returns a tuple with one element.
                yield name[0]

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of
        the trait type. Only trait types for the provided event_type are
        returned.

        :param event_type: the type of the Event
        """
        session = self._engine_facade.get_session()

        LOG.debug(_("Get traits for %s") % event_type)
        with session.begin():
            query = (session.query(
                models.TraitType.desc, models.TraitType.data_type).join(
                    models.Trait,
                    models.Trait.trait_type_id == models.TraitType.id).join(
                        models.Event,
                        models.Event.id == models.Trait.event_id).join(
                            models.EventType,
                            and_(
                                models.EventType.id == models.Event.id,
                                models.EventType.desc == event_type)).group_by(
                                    models.TraitType.desc,
                                    models.TraitType.data_type).distinct())

            for desc, type in query.all():
                yield {'name': desc, 'data_type': type}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type. If
        trait_type is specified, only return instances of that trait type.

        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """

        session = self._engine_facade.get_session()
        with session.begin():
            trait_type_filters = [
                models.TraitType.id == models.Trait.trait_type_id
            ]
            if trait_type:
                trait_type_filters.append(models.TraitType.desc == trait_type)

            query = (session.query(models.Trait).join(
                models.TraitType, and_(*trait_type_filters)).join(
                    models.Event,
                    models.Event.id == models.Trait.event_id).join(
                        models.EventType,
                        and_(models.EventType.id == models.Event.event_type_id,
                             models.EventType.desc == event_type)))

            for trait in query.all():
                type = trait.trait_type
                yield api_models.Trait(name=type.desc,
                                       dtype=type.data_type,
                                       value=trait.get_value())
Ejemplo n.º 19
0
class Connection(base.Connection):
    """Put the event data into a SQLAlchemy database.

    Tables::

        - EventType
          - event definition
          - { id: event type id
              desc: description of event
              }
        - Event
          - event data
          - { id: event id
              message_id: message id
              generated = timestamp of event
              event_type_id = event type -> eventtype.id
              }
        - TraitInt
          - int trait value
          - { event_id: event -> event.id
              key: trait type
              value: integer value
              }
        - TraitDatetime
          - int trait value
          - { event_id: event -> event.id
              key: trait type
              value: datetime value
              }
        - TraitText
          - int trait value
          - { event_id: event -> event.id
              key: trait type
              value: text value
              }
        - TraitFloat
          - int trait value
          - { event_id: event -> event.id
              key: trait type
              value: float value
              }

    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        # Set max_retries to 0, since oslo.db in certain cases may attempt
        # to retry making the db connection retried max_retries ^ 2 times
        # in failure case and db reconnection has already been implemented
        # in storage.__init__.get_connection_from_config function
        options = dict(cfg.CONF.database.items())
        options['max_retries'] = 0
        self._engine_facade = db_session.EngineFacade(url, **options)

    def upgrade(self):
        # NOTE(gordc): to minimise memory, only import migration when needed
        from oslo.db.sqlalchemy import migration
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                            '..', 'storage', 'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        self._engine_facade._session_maker.close_all()
        engine.dispose()

    def _get_or_create_event_type(self, event_type, session=None):
        """Check if an event type with the supplied name is already exists.

        If not, we create it and return the record. This may result in a flush.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            et = session.query(models.EventType).filter(
                models.EventType.desc == event_type).first()
            if not et:
                et = models.EventType(event_type)
                session.add(et)
        return et

    def record_events(self, event_models):
        """Write the events to SQL database via sqlalchemy.

        :param event_models: a list of model.Event objects.

        Returns a list of events that could not be saved in a
        (reason, event) tuple. Reasons are enumerated in
        storage.model.Event

        Flush when they're all added, unless new EventTypes or
        TraitTypes are added along the way.
        """
        session = self._engine_facade.get_session()
        problem_events = []
        for event_model in event_models:
            event = None
            try:
                with session.begin():
                    event_type = self._get_or_create_event_type(
                        event_model.event_type, session=session)
                    event = models.Event(event_model.message_id, event_type,
                                         event_model.generated)
                    session.add(event)
                    session.flush()

                    if event_model.traits:
                        trait_map = {}
                        for trait in event_model.traits:
                            if trait_map.get(trait.dtype) is None:
                                trait_map[trait.dtype] = []
                            trait_map[trait.dtype].append({
                                'event_id': event.id,
                                'key': trait.name,
                                'value': trait.value
                            })
                        for dtype in trait_map.keys():
                            model = TRAIT_ID_TO_MODEL[dtype]
                            session.execute(model.__table__.insert(),
                                            trait_map[dtype])
            except dbexc.DBDuplicateEntry as e:
                LOG.exception(_("Failed to record duplicated event: %s") % e)
                problem_events.append(
                    (api_models.Event.DUPLICATE, event_model))
            except KeyError as e:
                LOG.exception(_('Failed to record event: %s') % e)
                problem_events.append(
                    (api_models.Event.INCOMPATIBLE_TRAIT, event_model))
            except Exception as e:
                LOG.exception(_('Failed to record event: %s') % e)
                problem_events.append(
                    (api_models.Event.UNKNOWN_PROBLEM, event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return an iterable of model.Event objects.

        :param event_filter: EventFilter instance
        """

        session = self._engine_facade.get_session()
        with session.begin():
            event_query = session.query(models.Event)

            # Build up the join conditions
            event_join_conditions = [
                models.EventType.id == models.Event.event_type_id
            ]

            if event_filter.event_type:
                event_join_conditions.append(
                    models.EventType.desc == event_filter.event_type)

            event_query = event_query.join(models.EventType,
                                           sa.and_(*event_join_conditions))

            # Build up the where conditions
            event_filter_conditions = []
            if event_filter.message_id:
                event_filter_conditions.append(
                    models.Event.message_id == event_filter.message_id)
            if event_filter.start_timestamp:
                event_filter_conditions.append(
                    models.Event.generated >= event_filter.start_timestamp)
            if event_filter.end_timestamp:
                event_filter_conditions.append(
                    models.Event.generated <= event_filter.end_timestamp)
            if event_filter_conditions:
                event_query = (event_query.filter(
                    sa.and_(*event_filter_conditions)))

            trait_subq = None
            # Build trait filter
            if event_filter.traits_filter:
                trait_qlist = []
                for trait_filter in event_filter.traits_filter:
                    key = trait_filter.pop('key')
                    op = trait_filter.pop('op', 'eq')
                    trait_qlist.append(
                        _build_trait_query(session,
                                           trait_filter.keys()[0], key,
                                           trait_filter.values()[0], op))
                trait_subq = trait_qlist.pop()
                if trait_qlist:
                    trait_subq = trait_subq.intersect(*trait_qlist)
                trait_subq = trait_subq.subquery()

            query = (session.query(models.Event.id).join(
                models.EventType, sa.and_(*event_join_conditions)))
            if trait_subq is not None:
                query = query.join(trait_subq,
                                   trait_subq.c.ev_id == models.Event.id)
            if event_filter_conditions:
                query = query.filter(sa.and_(*event_filter_conditions))

            event_list = {}
            # get a list of all events that match filters
            for (id_, generated, message_id,
                 desc) in query.add_columns(models.Event.generated,
                                            models.Event.message_id,
                                            models.EventType.desc).order_by(
                                                models.Event.generated).all():
                event_list[id_] = api_models.Event(message_id, desc, generated,
                                                   [])
            # Query all traits related to events.
            # NOTE (gordc): cast is done because pgsql defaults to TEXT when
            #               handling unknown values such as null.
            trait_q = (query.join(
                models.TraitDatetime,
                models.TraitDatetime.event_id == models.Event.id).add_columns(
                    models.TraitDatetime.key, models.TraitDatetime.value,
                    sa.cast(sa.null(), sa.Integer),
                    sa.cast(sa.null(), sa.Float(53)),
                    sa.cast(sa.null(), sa.Text))).union(
                        query.join(models.TraitInt, models.TraitInt.event_id ==
                                   models.Event.id).add_columns(
                                       models.TraitInt.key, sa.null(),
                                       models.TraitInt.value, sa.null(),
                                       sa.null()),
                        query.join(
                            models.TraitFloat, models.TraitFloat.event_id ==
                            models.Event.id).add_columns(
                                models.TraitFloat.key, sa.null(), sa.null(),
                                models.TraitFloat.value, sa.null()),
                        query.join(
                            models.TraitText, models.TraitText.event_id ==
                            models.Event.id).add_columns(
                                models.TraitText.key, sa.null(), sa.null(),
                                sa.null(), models.TraitText.value))

            for id_, key, t_date, t_int, t_float, t_text in trait_q.all():
                if t_int:
                    dtype = api_models.Trait.INT_TYPE
                    val = t_int
                elif t_float:
                    dtype = api_models.Trait.FLOAT_TYPE
                    val = t_float
                elif t_date:
                    dtype = api_models.Trait.DATETIME_TYPE
                    val = t_date
                else:
                    dtype = api_models.Trait.TEXT_TYPE
                    val = t_text

                trait_model = api_models.Trait(key, dtype, val)
                event_list[id_].append_trait(trait_model)

            return event_list.values()

    def get_event_types(self):
        """Return all event types as an iterable of strings."""

        session = self._engine_facade.get_session()
        with session.begin():
            query = (session.query(models.EventType.desc).order_by(
                models.EventType.desc))
            for name in query.all():
                # The query returns a tuple with one element.
                yield name[0]

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.
        :param event_type: the type of the Event
        """
        session = self._engine_facade.get_session()

        with session.begin():
            for trait_model in [
                    models.TraitText, models.TraitInt, models.TraitFloat,
                    models.TraitDatetime
            ]:
                query = (session.query(trait_model.key).join(
                    models.Event,
                    models.Event.id == trait_model.event_id).join(
                        models.EventType,
                        sa.and_(
                            models.EventType.id == models.Event.event_type_id,
                            models.EventType.desc == event_type)).distinct())

                dtype = TRAIT_MODEL_TO_ID.get(trait_model)
                for row in query.all():
                    yield {'name': row[0], 'data_type': dtype}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """

        session = self._engine_facade.get_session()
        with session.begin():
            for trait_model in [
                    models.TraitText, models.TraitInt, models.TraitFloat,
                    models.TraitDatetime
            ]:
                query = (session.query(
                    trait_model.key, trait_model.value).join(
                        models.Event,
                        models.Event.id == trait_model.event_id).join(
                            models.EventType,
                            sa.and_(
                                models.EventType.id ==
                                models.Event.event_type_id,
                                models.EventType.desc == event_type)).order_by(
                                    trait_model.key))
                if trait_type:
                    query = query.filter(trait_model.key == trait_type)

                dtype = TRAIT_MODEL_TO_ID.get(trait_model)
                for k, v in query.all():
                    yield api_models.Trait(name=k, dtype=dtype, value=v)
Ejemplo n.º 20
0
class Connection(hbase_base.Connection, base.Connection):
    """Put the metering data into a HBase database

    Collections:

    - meter (describes sample actually):

      - row-key: consists of reversed timestamp, meter and a message uuid
        for purposes of uniqueness
      - Column Families:

        f: contains the following qualifiers:

          - counter_name: <name of counter>
          - counter_type: <type of counter>
          - counter_unit: <unit of counter>
          - counter_volume: <volume of counter>
          - message: <raw incoming data>
          - message_id: <id of message>
          - message_signature: <signature of message>
          - resource_metadata: raw metadata for corresponding resource
            of the meter
          - project_id: <id of project>
          - resource_id: <id of resource>
          - user_id: <id of user>
          - recorded_at: <datetime when sample has been recorded (utc.now)>
          - flattened metadata with prefix r_metadata. e.g.::

             f:r_metadata.display_name or f:r_metadata.tag

          - rts: <reversed timestamp of entry>
          - timestamp: <meter's timestamp (came from message)>
          - source for meter with prefix 's'

    - resource:

      - row_key: uuid of resource
      - Column Families:

        f: contains the following qualifiers:

          - resource_metadata: raw metadata for corresponding resource
          - project_id: <id of project>
          - resource_id: <id of resource>
          - user_id: <id of user>
          - flattened metadata with prefix r_metadata. e.g.::

             f:r_metadata.display_name or f:r_metadata.tag

          - sources for all corresponding meters with prefix 's'
          - all meters with prefix 'm' for this resource in format:

            .. code-block:: python

              "%s:%s:%s:%s:%s" % (rts, source, counter_name, counter_type,
              counter_unit)
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    _memory_instance = None

    RESOURCE_TABLE = "resource"
    METER_TABLE = "meter"

    def __init__(self, url):
        super(Connection, self).__init__(url)

    def upgrade(self):
        tables = [self.RESOURCE_TABLE, self.METER_TABLE]
        column_families = {'f': dict(max_versions=1)}
        with self.conn_pool.connection() as conn:
            hbase_utils.create_tables(conn, tables, column_families)
            hbase_migration.migrate_tables(conn, tables)

    def clear(self):
        LOG.debug('Dropping HBase schema...')
        with self.conn_pool.connection() as conn:
            for table in [self.RESOURCE_TABLE, self.METER_TABLE]:
                try:
                    conn.disable_table(table)
                except Exception:
                    LOG.debug('Cannot disable table but ignoring error')
                try:
                    conn.delete_table(table)
                except Exception:
                    LOG.debug('Cannot delete table but ignoring error')

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
          ceilometer.meter.meter_message_from_counter
        """
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            meter_table = conn.table(self.METER_TABLE)

            resource_metadata = data.get('resource_metadata', {})
            # Determine the name of new meter
            rts = hbase_utils.timestamp(data['timestamp'])
            new_meter = hbase_utils.prepare_key(rts, data['source'],
                                                data['counter_name'],
                                                data['counter_type'],
                                                data['counter_unit'])

            # TODO(nprivalova): try not to store resource_id
            resource = hbase_utils.serialize_entry(
                **{
                    'source': data['source'],
                    'meter': {
                        new_meter: data['timestamp']
                    },
                    'resource_metadata': resource_metadata,
                    'resource_id': data['resource_id'],
                    'project_id': data['project_id'],
                    'user_id': data['user_id']
                })
            # Here we put entry in HBase with our own timestamp. This is needed
            # when samples arrive out-of-order
            # If we use timestamp=data['timestamp'] the newest data will be
            # automatically 'on the top'. It is needed to keep metadata
            # up-to-date: metadata from newest samples is considered as actual.
            ts = int(time.mktime(data['timestamp'].timetuple()) * 1000)
            resource_table.put(hbase_utils.encode_unicode(data['resource_id']),
                               resource, ts)

            # Rowkey consists of reversed timestamp, meter and a
            # message uuid for purposes of uniqueness
            row = hbase_utils.prepare_key(data['counter_name'], rts,
                                          data['message_id'])
            record = hbase_utils.serialize_entry(
                data, **{
                    'source': data['source'],
                    'rts': rts,
                    'message': data,
                    'recorded_at': timeutils.utcnow()
                })
            meter_table.put(row, record)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      limit=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like ge, gt.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return
        q = hbase_utils.make_query(metaquery=metaquery,
                                   user_id=user,
                                   project_id=project,
                                   resource_id=resource,
                                   source=source)
        q = hbase_utils.make_meter_query_for_resource(start_timestamp,
                                                      start_timestamp_op,
                                                      end_timestamp,
                                                      end_timestamp_op, source,
                                                      q)
        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            LOG.debug("Query Resource table: %s", q)
            for resource_id, data in resource_table.scan(filter=q,
                                                         limit=limit):
                f_res, meters, md = hbase_utils.deserialize_entry(data)
                resource_id = hbase_utils.encode_unicode(resource_id)
                # Unfortunately happybase doesn't keep ordered result from
                # HBase. So that's why it's needed to find min and max
                # manually
                first_ts = min(meters, key=operator.itemgetter(1))[1]
                last_ts = max(meters, key=operator.itemgetter(1))[1]
                source = meters[0][0][1]
                # If we use QualifierFilter then HBase returnes only
                # qualifiers filtered by. It will not return the whole entry.
                # That's why if we need to ask additional qualifiers manually.
                if 'project_id' not in f_res and 'user_id' not in f_res:
                    row = resource_table.row(resource_id,
                                             columns=[
                                                 'f:project_id', 'f:user_id',
                                                 'f:resource_metadata'
                                             ])
                    f_res, _m, md = hbase_utils.deserialize_entry(row)
                yield models.Resource(resource_id=resource_id,
                                      first_sample_timestamp=first_ts,
                                      last_sample_timestamp=last_ts,
                                      project_id=f_res['project_id'],
                                      source=source,
                                      user_id=f_res['user_id'],
                                      metadata=md)

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   limit=None,
                   unique=False):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param limit: Maximum number of results to return.
        :param unique: If set to true, return only unique meter information.
        """
        if limit == 0:
            return

        metaquery = metaquery or {}

        with self.conn_pool.connection() as conn:
            resource_table = conn.table(self.RESOURCE_TABLE)
            q = hbase_utils.make_query(metaquery=metaquery,
                                       user_id=user,
                                       project_id=project,
                                       resource_id=resource,
                                       source=source)
            LOG.debug("Query Resource table: %s", q)

            gen = resource_table.scan(filter=q)
            # We need result set to be sure that user doesn't receive several
            # same meters. Please see bug
            # https://bugs.launchpad.net/ceilometer/+bug/1301371
            result = set()
            for ignored, data in gen:
                flatten_result, meters, md = hbase_utils.deserialize_entry(
                    data)
                for m in meters:
                    if limit and len(result) >= limit:
                        return
                    _m_rts, m_source, name, m_type, unit = m[0]
                    if unique:
                        meter_dict = {
                            'name': name,
                            'type': m_type,
                            'unit': unit,
                            'resource_id': None,
                            'project_id': None,
                            'user_id': None,
                            'source': None
                        }
                    else:
                        meter_dict = {
                            'name': name,
                            'type': m_type,
                            'unit': unit,
                            'resource_id': flatten_result['resource_id'],
                            'project_id': flatten_result['project_id'],
                            'user_id': flatten_result['user_id']
                        }

                    frozen_meter = frozenset(meter_dict.items())
                    if frozen_meter in result:
                        continue
                    result.add(frozen_meter)
                    if not unique:
                        meter_dict.update(
                            {'source': m_source if m_source else None})

                    yield models.Meter(**meter_dict)

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of models.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return
        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop, columns = (
                hbase_utils.make_sample_query_from_filter(sample_filter,
                                                          require_meter=False))
            LOG.debug("Query Meter Table: %s", q)
            gen = meter_table.scan(filter=q,
                                   row_start=start,
                                   row_stop=stop,
                                   limit=limit,
                                   columns=columns)
            for ignored, meter in gen:
                d_meter = hbase_utils.deserialize_entry(meter)[0]
                d_meter['message']['counter_volume'] = (float(
                    d_meter['message']['counter_volume']))
                d_meter['message']['recorded_at'] = d_meter['recorded_at']
                yield models.Sample(**d_meter['message'])

    @staticmethod
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = (timeutils.delta_seconds(stat.duration_start,
                                                 stat.duration_end))

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.

        .. note::

          Due to HBase limitations the aggregations are implemented
          in the driver itself, therefore this method will be quite slow
          because of all the Thrift traffic it is going to create.
        """
        if groupby:
            raise ceilometer.NotImplementedError("Group by not implemented.")

        if aggregate:
            raise ceilometer.NotImplementedError(
                'Selectable aggregates not implemented')

        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop, columns = (
                hbase_utils.make_sample_query_from_filter(sample_filter))
            # These fields are used in statistics' calculating
            columns.extend(
                ['f:timestamp', 'f:counter_volume', 'f:counter_unit'])
            meters = map(
                hbase_utils.deserialize_entry,
                list(meter for (ignored, meter) in meter_table.scan(
                    filter=q, row_start=start, row_stop=stop,
                    columns=columns)))

        if sample_filter.start_timestamp:
            start_time = sample_filter.start_timestamp
        elif meters:
            start_time = meters[-1][0]['timestamp']
        else:
            start_time = None

        if sample_filter.end_timestamp:
            end_time = sample_filter.end_timestamp
        elif meters:
            end_time = meters[0][0]['timestamp']
        else:
            end_time = None

        results = []

        if not period:
            period = 0
            period_start = start_time
            period_end = end_time

        # As our HBase meters are stored as newest-first, we need to iterate
        # in the reverse order
        for meter in meters[::-1]:
            ts = meter[0]['timestamp']
            if period:
                offset = int(
                    timeutils.delta_seconds(start_time, ts) / period) * period
                period_start = start_time + datetime.timedelta(0, offset)

            if not results or not results[-1].period_start == period_start:
                if period:
                    period_end = period_start + datetime.timedelta(0, period)
                results.append(
                    models.Statistics(unit='',
                                      count=0,
                                      min=0,
                                      max=0,
                                      avg=0,
                                      sum=0,
                                      period=period,
                                      period_start=period_start,
                                      period_end=period_end,
                                      duration=None,
                                      duration_start=None,
                                      duration_end=None,
                                      groupby=None))
            self._update_meter_stats(results[-1], meter[0])
        return results
Ejemplo n.º 21
0
class Connection(base.Connection):
    """Put the event data into an ElasticSearch db.

    Events in ElasticSearch are indexed by day and stored by event_type.
    An example document::

      {"_index":"events_2014-10-21",
       "_type":"event_type0",
       "_id":"dc90e464-65ab-4a5d-bf66-ecb956b5d779",
       "_score":1.0,
       "_source":{"timestamp": "2014-10-21T20:02:09.274797"
                  "traits": {"id4_0": "2014-10-21T20:02:09.274797",
                             "id3_0": 0.7510790937279408,
                             "id2_0": 5,
                             "id1_0": "18c97ba1-3b74-441a-b948-a702a30cbce2"}
                 }
      }
    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )
    index_name = 'events'
    # NOTE(gordc): mainly for testing, data is not searchable after write,
    #              it is only searchable after periodic refreshes.
    _refresh_on_write = False

    def __init__(self, url):
        url_split = netutils.urlsplit(url)
        self.conn = es.Elasticsearch(url_split.netloc)

    def upgrade(self):
        iclient = es.client.IndicesClient(self.conn)
        ts_template = {
            'template': '*',
            'mappings': {
                '_default_': {
                    '_timestamp': {
                        'enabled': True,
                        'store': True
                    },
                    'properties': {
                        'traits': {
                            'type': 'nested'
                        }
                    }
                }
            }
        }
        iclient.put_template(name='enable_timestamp', body=ts_template)

    def record_events(self, events):
        def _build_bulk_index(event_list):
            for ev in event_list:
                traits = {t.name: t.value for t in ev.traits}
                yield {
                    '_op_type':
                    'create',
                    '_index':
                    '%s_%s' %
                    (self.index_name, ev.generated.date().isoformat()),
                    '_type':
                    ev.event_type,
                    '_id':
                    ev.message_id,
                    '_source': {
                        'timestamp': ev.generated.isoformat(),
                        'traits': traits,
                        'raw': ev.raw
                    }
                }

        error = None
        for ok, result in helpers.streaming_bulk(self.conn,
                                                 _build_bulk_index(events)):
            if not ok:
                __, result = result.popitem()
                if result['status'] == 409:
                    LOG.info(
                        _LI('Duplicate event detected, skipping it: %s') %
                        result)
                else:
                    LOG.exception(_LE('Failed to record event: %s') % result)
                    error = storage.StorageUnknownWriteError(result)

        if self._refresh_on_write:
            self.conn.indices.refresh(index='%s_*' % self.index_name)
            while self.conn.cluster.pending_tasks(local=True)['tasks']:
                pass
        if error:
            raise error

    def _make_dsl_from_filter(self, indices, ev_filter):
        q_args = {}
        filters = []

        if ev_filter.start_timestamp:
            filters.append({
                'range': {
                    'timestamp': {
                        'ge': ev_filter.start_timestamp.isoformat()
                    }
                }
            })
            while indices[0] < (
                    '%s_%s' % (self.index_name,
                               ev_filter.start_timestamp.date().isoformat())):
                del indices[0]
        if ev_filter.end_timestamp:
            filters.append({
                'range': {
                    'timestamp': {
                        'le': ev_filter.end_timestamp.isoformat()
                    }
                }
            })
            while indices[-1] > (
                    '%s_%s' %
                (self.index_name, ev_filter.end_timestamp.date().isoformat())):
                del indices[-1]
        q_args['index'] = indices

        if ev_filter.event_type:
            q_args['doc_type'] = ev_filter.event_type
        if ev_filter.message_id:
            filters.append({'term': {'_id': ev_filter.message_id}})
        if ev_filter.traits_filter or ev_filter.admin_proj:
            trait_filters = []
            or_cond = []
            for t_filter in ev_filter.traits_filter or []:
                value = None
                for val_type in ['integer', 'string', 'float', 'datetime']:
                    if t_filter.get(val_type):
                        value = t_filter.get(val_type)
                        if isinstance(value, six.string_types):
                            value = value.lower()
                        elif isinstance(value, datetime.datetime):
                            value = value.isoformat()
                        break
                if t_filter.get('op') in ['gt', 'ge', 'lt', 'le']:
                    op = (t_filter.get('op').replace('ge', 'gte').replace(
                        'le', 'lte'))
                    trait_filters.append(
                        {'range': {
                            t_filter['key']: {
                                op: value
                            }
                        }})
                else:
                    tf = {
                        "query": {
                            "query_string": {
                                "query":
                                "%s: \"%s\"" % (t_filter['key'], value)
                            }
                        }
                    }
                    if t_filter.get('op') == 'ne':
                        tf = {"not": tf}
                    trait_filters.append(tf)
            if ev_filter.admin_proj:
                or_cond = [{
                    'missing': {
                        'field': 'project_id'
                    }
                }, {
                    'term': {
                        'project_id': ev_filter.admin_proj
                    }
                }]
            filters.append({
                'nested': {
                    'path': 'traits',
                    'query': {
                        'filtered': {
                            'filter': {
                                'bool': {
                                    'must': trait_filters,
                                    'should': or_cond
                                }
                            }
                        }
                    }
                }
            })

        q_args['body'] = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': filters
                        }
                    }
                }
            }
        }
        return q_args

    def get_events(self, event_filter, limit=None):
        if limit == 0:
            return
        iclient = es.client.IndicesClient(self.conn)
        indices = iclient.get_mapping('%s_*' % self.index_name).keys()
        if indices:
            filter_args = self._make_dsl_from_filter(indices, event_filter)
            if limit is not None:
                filter_args['size'] = limit
            results = self.conn.search(
                fields=['_id', 'timestamp', '_type', '_source'],
                sort='timestamp:asc',
                **filter_args)
            trait_mappings = {}
            for record in results['hits']['hits']:
                trait_list = []
                if not record['_type'] in trait_mappings:
                    trait_mappings[record['_type']] = list(
                        self.get_trait_types(record['_type']))
                for key in record['_source']['traits'].keys():
                    value = record['_source']['traits'][key]
                    for t_map in trait_mappings[record['_type']]:
                        if t_map['name'] == key:
                            dtype = t_map['data_type']
                            break
                    else:
                        dtype = models.Trait.TEXT_TYPE
                    trait_list.append(
                        models.Trait(name=key,
                                     dtype=dtype,
                                     value=models.Trait.convert_value(
                                         dtype, value)))
                gen_ts = timeutils.normalize_time(
                    timeutils.parse_isotime(record['_source']['timestamp']))
                yield models.Event(message_id=record['_id'],
                                   event_type=record['_type'],
                                   generated=gen_ts,
                                   traits=sorted(
                                       trait_list,
                                       key=operator.attrgetter('dtype')),
                                   raw=record['_source']['raw'])

    def get_event_types(self):
        iclient = es.client.IndicesClient(self.conn)
        es_mappings = iclient.get_mapping('%s_*' % self.index_name)
        seen_types = set()
        for index in es_mappings.keys():
            for ev_type in es_mappings[index]['mappings'].keys():
                seen_types.add(ev_type)
        # TODO(gordc): tests assume sorted ordering but backends are not
        #              explicitly ordered.
        # NOTE: _default_ is a type that appears in all mappings but is not
        #       real 'type'
        seen_types.discard('_default_')
        return sorted(list(seen_types))

    @staticmethod
    def _remap_es_types(d_type):
        if d_type == 'string':
            d_type = 'text'
        elif d_type == 'long':
            d_type = 'int'
        elif d_type == 'double':
            d_type = 'float'
        elif d_type == 'date' or d_type == 'date_time':
            d_type = 'datetime'
        return d_type

    def get_trait_types(self, event_type):
        iclient = es.client.IndicesClient(self.conn)
        es_mappings = iclient.get_mapping('%s_*' % self.index_name)
        seen_types = []
        for index in es_mappings.keys():
            # if event_type exists in index and has traits
            if (es_mappings[index]['mappings'].get(event_type)
                    and es_mappings[index]['mappings'][event_type]
                ['properties']['traits'].get('properties')):
                for t_type in (es_mappings[index]['mappings'][event_type]
                               ['properties']['traits']['properties'].keys()):
                    d_type = (
                        es_mappings[index]['mappings'][event_type]
                        ['properties']['traits']['properties'][t_type]['type'])
                    d_type = models.Trait.get_type_by_name(
                        self._remap_es_types(d_type))
                    if (t_type, d_type) not in seen_types:
                        yield {'name': t_type, 'data_type': d_type}
                        seen_types.append((t_type, d_type))

    def get_traits(self, event_type, trait_type=None):
        t_types = dict((res['name'], res['data_type'])
                       for res in self.get_trait_types(event_type))
        if not t_types or (trait_type and trait_type not in t_types.keys()):
            return
        result = self.conn.search('%s_*' % self.index_name, event_type)
        for ev in result['hits']['hits']:
            if trait_type and ev['_source']['traits'].get(trait_type):
                yield models.Trait(name=trait_type,
                                   dtype=t_types[trait_type],
                                   value=models.Trait.convert_value(
                                       t_types[trait_type],
                                       ev['_source']['traits'][trait_type]))
            else:
                for trait in ev['_source']['traits'].keys():
                    yield models.Trait(name=trait,
                                       dtype=t_types[trait],
                                       value=models.Trait.convert_value(
                                           t_types[trait],
                                           ev['_source']['traits'][trait]))
Ejemplo n.º 22
0
class Connection(base.Connection):
    """Base Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   pagination=None):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if resource is not None:
            q['_id'] = resource
        if source is not None:
            q['source'] = source
        q.update(metaquery)

        for r in self.db.resource.find(q):
            for r_meter in r['meter']:
                yield models.Meter(
                    name=r_meter['counter_name'],
                    type=r_meter['counter_type'],
                    # Return empty string if 'counter_unit' is not valid for
                    # backward compatibility.
                    unit=r_meter.get('counter_unit', ''),
                    resource_id=r['_id'],
                    project_id=r['project_id'],
                    source=r['source'],
                    user_id=r['user_id'],
                )

    def update_alarm(self, alarm):
        """Update alarm."""
        data = alarm.as_dict()

        self.db.alarm.update({'alarm_id': alarm.alarm_id}, {'$set': data},
                             upsert=True)

        stored_alarm = self.db.alarm.find({'alarm_id': alarm.alarm_id})[0]
        del stored_alarm['_id']
        self._ensure_encapsulated_rule_format(stored_alarm)
        self._ensure_time_constraints(stored_alarm)
        return alarm_models.Alarm(**stored_alarm)

    create_alarm = update_alarm

    def delete_alarm(self, alarm_id):
        """Delete an alarm."""
        self.db.alarm.remove({'alarm_id': alarm_id})

    def record_alarm_change(self, alarm_change):
        """Record alarm change event."""
        self.db.alarm_history.insert(alarm_change.copy())

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of model.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []
        q = pymongo_utils.make_query_from_filter(sample_filter,
                                                 require_meter=False)

        return self._retrieve_samples(q, [("timestamp", pymongo.DESCENDING)],
                                      limit)

    def get_alarms(self,
                   name=None,
                   user=None,
                   state=None,
                   meter=None,
                   project=None,
                   enabled=None,
                   alarm_id=None,
                   pagination=None):
        """Yields a lists of alarms that match filters

        :param name: The Alarm name.
        :param user: Optional ID for user that owns the resource.
        :param state: Optional string for alarm state.
        :param meter: Optional string for alarms associated with meter.
        :param project: Optional ID for project that owns the resource.
        :param enabled: Optional boolean to list disable alarm.
        :param alarm_id: Optional alarm_id to return one alarm.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if name is not None:
            q['name'] = name
        if enabled is not None:
            q['enabled'] = enabled
        if alarm_id is not None:
            q['alarm_id'] = alarm_id
        if state is not None:
            q['state'] = state
        if meter is not None:
            q['rule.meter_name'] = meter

        return self._retrieve_alarms(q, [], None)

    def get_alarm_changes(self,
                          alarm_id,
                          on_behalf_of,
                          user=None,
                          project=None,
                          type=None,
                          start_timestamp=None,
                          start_timestamp_op=None,
                          end_timestamp=None,
                          end_timestamp_op=None):
        """Yields list of AlarmChanges describing alarm history

        Changes are always sorted in reverse order of occurrence, given
        the importance of currency.

        Segregation for non-administrative users is done on the basis
        of the on_behalf_of parameter. This allows such users to have
        visibility on both the changes initiated by themselves directly
        (generally creation, rule changes, or deletion) and also on those
        changes initiated on their behalf by the alarming service (state
        transitions after alarm thresholds are crossed).

        :param alarm_id: ID of alarm to return changes for
        :param on_behalf_of: ID of tenant to scope changes query (None for
                             administrative user, indicating all projects)
        :param user: Optional ID of user to return changes for
        :param project: Optional ID of project to return changes for
        :project type: Optional change type
        :param start_timestamp: Optional modified timestamp start range
        :param start_timestamp_op: Optional timestamp start range operation
        :param end_timestamp: Optional modified timestamp end range
        :param end_timestamp_op: Optional timestamp end range operation
        """
        q = dict(alarm_id=alarm_id)
        if on_behalf_of is not None:
            q['on_behalf_of'] = on_behalf_of
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if type is not None:
            q['type'] = type
        if start_timestamp or end_timestamp:
            ts_range = pymongo_utils.make_timestamp_range(
                start_timestamp, end_timestamp, start_timestamp_op,
                end_timestamp_op)
            if ts_range:
                q['timestamp'] = ts_range

        return self._retrieve_alarm_changes(
            q, [("timestamp", pymongo.DESCENDING)], None)

    def record_events(self, event_models):
        """Write the events to database.

        Return a list of events of type models.Event.DUPLICATE in case of
        trying to write an already existing event to the database, or
        models.Event.UNKONW_PROBLEM in case of any failures with recording the
        event in the database.

        :param event_models: a list of models.Event objects.
        """
        problem_events = []
        for event_model in event_models:
            traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    traits.append({
                        'trait_name': trait.name,
                        'trait_type': trait.dtype,
                        'trait_value': trait.value
                    })
            try:
                self.db.event.insert({
                    '_id': event_model.message_id,
                    'event_type': event_model.event_type,
                    'timestamp': event_model.generated,
                    'traits': traits
                })
            except pymongo.errors.DuplicateKeyError:
                problem_events.append((models.Event.DUPLICATE, event_model))
            except Exception as ex:
                LOG.exception(_("Failed to record event: %s") % ex)
                problem_events.append(
                    (models.Event.UNKNOWN_PROBLEM, event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return a list of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
                             for events that are stored in database.
        """
        q = pymongo_utils.make_events_query_from_filter(event_filter)
        res_events = []
        for event in self.db.event.find(q):
            traits = []
            for trait in event['traits']:
                traits.append(
                    models.Trait(name=trait['trait_name'],
                                 dtype=int(trait['trait_type']),
                                 value=trait['trait_value']))
            res_events.append(
                models.Event(message_id=event['_id'],
                             event_type=event['event_type'],
                             generated=event['timestamp'],
                             traits=traits))
        return res_events

    def get_event_types(self):
        """Return all event types as an iter of strings."""
        event_types = set()
        events = self.db.event.find()

        for event in events:
            event_type = event['event_type']
            if event_type not in event_types:
                event_types.add(event_type)
                yield event_type

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event.
        """
        trait_names = set()
        events = self.db.event.find({'event_type': event_type})

        for event in events:
            for trait in event['traits']:
                trait_name = trait['trait_name']
                if trait_name not in trait_names:
                    # Here we check that our method return only unique
                    # trait types. Method will return only one trait type. It
                    # is proposed that certain trait name could have only one
                    # trait type.
                    trait_names.add(trait_name)
                    yield {
                        'name': trait_name,
                        'data_type': trait['trait_type']
                    }

    def get_traits(self, event_type, trait_name=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.

        :param event_type: the type of the Event to filter by
        :param trait_name: the name of the Trait to filter by
        """
        if not trait_name:
            events = self.db.event.find({'event_type': event_type})
        else:
            # We choose events that simultaneously have event_type and certain
            # trait_name, and retrieve events contains only mentioned traits.
            events = self.db.event.find(
                {
                    '$and': [{
                        'event_type': event_type
                    }, {
                        'traits.trait_name': trait_name
                    }]
                }, {'traits': {
                    '$elemMatch': {
                        'trait_name': trait_name
                    }
                }})
        traits = []
        for event in events:
            for trait in event['traits']:
                traits.append(
                    models.Trait(name=trait['trait_name'],
                                 dtype=trait['trait_type'],
                                 value=trait['trait_value']))
        for trait in sorted(traits, key=operator.attrgetter('dtype')):
            yield trait

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        return self._retrieve_data(filter_expr, orderby, limit, models.Meter)

    def query_alarms(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.Alarm objects."""
        return self._retrieve_data(filter_expr, orderby, limit,
                                   alarm_models.Alarm)

    def query_alarm_history(self, filter_expr=None, orderby=None, limit=None):
        """Return an iterable of model.AlarmChange objects."""
        return self._retrieve_data(filter_expr, orderby, limit,
                                   alarm_models.AlarmChange)

    def _retrieve_data(self, filter_expr, orderby, limit, model):
        if limit == 0:
            return []
        query_filter = {}
        orderby_filter = [("timestamp", pymongo.DESCENDING)]
        transformer = pymongo_utils.QueryTransformer()
        if orderby is not None:
            orderby_filter = transformer.transform_orderby(orderby)
        if filter_expr is not None:
            query_filter = transformer.transform_filter(filter_expr)

        retrieve = {
            models.Meter: self._retrieve_samples,
            alarm_models.Alarm: self._retrieve_alarms,
            alarm_models.AlarmChange: self._retrieve_alarm_changes
        }
        return retrieve[model](query_filter, orderby_filter, limit)

    def _retrieve_samples(self, query, orderby, limit):
        if limit is not None:
            samples = self.db.meter.find(query, limit=limit, sort=orderby)
        else:
            samples = self.db.meter.find(query, sort=orderby)

        for s in samples:
            # Remove the ObjectId generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            del s['_id']
            # Backward compatibility for samples without units
            s['counter_unit'] = s.get('counter_unit', '')
            # Tolerate absence of recorded_at in older datapoints
            s['recorded_at'] = s.get('recorded_at')
            yield models.Sample(**s)

    def _retrieve_alarms(self, query_filter, orderby, limit):
        if limit is not None:
            alarms = self.db.alarm.find(query_filter,
                                        limit=limit,
                                        sort=orderby)
        else:
            alarms = self.db.alarm.find(query_filter, sort=orderby)

        for alarm in alarms:
            a = {}
            a.update(alarm)
            del a['_id']
            self._ensure_encapsulated_rule_format(a)
            self._ensure_time_constraints(a)
            yield alarm_models.Alarm(**a)

    def _retrieve_alarm_changes(self, query_filter, orderby, limit):
        if limit is not None:
            alarms_history = self.db.alarm_history.find(query_filter,
                                                        limit=limit,
                                                        sort=orderby)
        else:
            alarms_history = self.db.alarm_history.find(query_filter,
                                                        sort=orderby)

        for alarm_history in alarms_history:
            ah = {}
            ah.update(alarm_history)
            del ah['_id']
            yield alarm_models.AlarmChange(**ah)

    @classmethod
    def _ensure_encapsulated_rule_format(cls, alarm):
        """Ensure the alarm returned by the storage have the correct format.

        The previous format looks like:
        {
            'alarm_id': '0ld-4l3rt',
            'enabled': True,
            'name': 'old-alert',
            'description': 'old-alert',
            'timestamp': None,
            'meter_name': 'cpu',
            'user_id': 'me',
            'project_id': 'and-da-boys',
            'comparison_operator': 'lt',
            'threshold': 36,
            'statistic': 'count',
            'evaluation_periods': 1,
            'period': 60,
            'state': "insufficient data",
            'state_timestamp': None,
            'ok_actions': [],
            'alarm_actions': ['http://nowhere/alarms'],
            'insufficient_data_actions': [],
            'repeat_actions': False,
            'matching_metadata': {'key': 'value'}
            # or 'matching_metadata': [{'key': 'key', 'value': 'value'}]
        }
        """

        if isinstance(alarm.get('rule'), dict):
            return

        alarm['type'] = 'threshold'
        alarm['rule'] = {}
        alarm['matching_metadata'] = cls._decode_matching_metadata(
            alarm['matching_metadata'])
        for field in [
                'period', 'evaluation_periods', 'threshold', 'statistic',
                'comparison_operator', 'meter_name'
        ]:
            if field in alarm:
                alarm['rule'][field] = alarm[field]
                del alarm[field]

        query = []
        for key in alarm['matching_metadata']:
            query.append({
                'field': key,
                'op': 'eq',
                'value': alarm['matching_metadata'][key],
                'type': 'string'
            })
        del alarm['matching_metadata']
        alarm['rule']['query'] = query

    @staticmethod
    def _decode_matching_metadata(matching_metadata):
        if isinstance(matching_metadata, dict):
            # note(sileht): keep compatibility with alarm
            # with matching_metadata as a dict
            return matching_metadata
        else:
            new_matching_metadata = {}
            for elem in matching_metadata:
                new_matching_metadata[elem['key']] = elem['value']
            return new_matching_metadata

    @staticmethod
    def _ensure_time_constraints(alarm):
        """Ensures the alarm has a time constraints field."""
        if 'time_constraints' not in alarm:
            alarm['time_constraints'] = []
Ejemplo n.º 23
0
class Connection(base.Connection):
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        self.mc = monasca_client.Client(netutils.urlsplit(url))
        self.mon_filter = MonascaDataFilter()

    @staticmethod
    def _convert_to_dict(stats, cols):
        return {c: stats[i] for i, c in enumerate(cols)}

    def _convert_metaquery(self, metaquery):
        """Strip "metadata." from key and convert value to string

        :param metaquery:  { 'metadata.KEY': VALUE, ... }
        :returns: converted metaquery
        """
        query = {}
        for k, v in metaquery.items():
            key = k.split('.')[1]
            if isinstance(v, basestring):
                query[key] = v
            else:
                query[key] = str(int(v))
        return query

    def _match_metaquery_to_value_meta(self, query, value_meta):
        """Check if metaquery matches value_meta

        :param query: metaquery with converted format
        :param value_meta: metadata from monasca
        :returns: True for matched, False for not matched
        """
        if (len(query) > 0 and
            (len(value_meta) == 0
             or not set(query.items()).issubset(set(value_meta.items())))):
            return False
        else:
            return True

    def upgrade(self):
        pass

    def clear(self):
        pass

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter.
        """
        LOG.info(
            _('metering data %(counter_name)s for %(resource_id)s: '
              '%(counter_volume)s') % ({
                  'counter_name': data['counter_name'],
                  'resource_id': data['resource_id'],
                  'counter_volume': data['counter_volume']
              }))

        metric = self.mon_filter.process_sample_for_monasca(data)
        self.mc.metrics_create(**metric)

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs according to the time-to-live.
        :param ttl: Number of seconds to keep records for.
        """
        LOG.info(_("Dropping data with TTL %d"), ttl)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      pagination=None):
        """Return an iterable of dictionaries containing resource information.

        { 'resource_id': UUID of the resource,
          'project_id': UUID of project owning the resource,
          'user_id': UUID of user owning the resource,
          'timestamp': UTC datetime of last update to the resource,
          'metadata': most current metadata for the resource,
          'meter': list of the meters reporting data for the resource,
          }

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        q = {}
        if metaquery:
            q = self._convert_metaquery(metaquery)

        if start_timestamp_op and start_timestamp_op != 'ge':
            raise ceilometer.NotImplementedError(
                ('Start time op %s '
                 'not implemented') % start_timestamp_op)

        if end_timestamp_op and end_timestamp_op != 'le':
            raise ceilometer.NotImplementedError(
                ('End time op %s '
                 'not implemented') % end_timestamp_op)

        if not start_timestamp:
            start_timestamp = timeutils.isotime(datetime.datetime(1970, 1, 1))
        else:
            start_timestamp = timeutils.isotime(start_timestamp)

        if end_timestamp:
            end_timestamp = timeutils.isotime(end_timestamp)

        dims_filter = dict(user_id=user,
                           project_id=project,
                           source=source,
                           resource_id=resource)
        dims_filter = {k: v for k, v in dims_filter.items() if v is not None}

        _search_args = dict(start_time=start_timestamp,
                            end_time=end_timestamp,
                            limit=1)

        _search_args = {k: v for k, v in _search_args.items() if v is not None}

        for metric in self.mc.metrics_list(**dict(dimensions=dims_filter)):
            _search_args['name'] = metric['name']
            _search_args['dimensions'] = metric['dimensions']
            try:
                for sample in self.mc.measurements_list(**_search_args):
                    d = sample['dimensions']
                    m = self._convert_to_dict(sample['measurements'][0],
                                              sample['columns'])
                    vm = m['value_meta']
                    if not self._match_metaquery_to_value_meta(q, vm):
                        continue
                    if d.get('resource_id'):
                        yield api_models.Resource(
                            resource_id=d.get('resource_id'),
                            first_sample_timestamp=(timeutils.parse_isotime(
                                m['timestamp'])),
                            last_sample_timestamp=timeutils.utcnow(),
                            project_id=d.get('project_id'),
                            source=d.get('source'),
                            user_id=d.get('user_id'),
                            metadata=m['value_meta'],
                        )
            except monasca_exc.HTTPConflict:
                pass

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   limit=None,
                   metaquery=None,
                   pagination=None):
        """Return an iterable of dictionaries containing meter information.

        { 'name': name of the meter,
          'type': type of the meter (gauge, delta, cumulative),
          'resource_id': UUID of the resource,
          'project_id': UUID of project owning the resource,
          'user_id': UUID of user owning the resource,
          }

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param limit: Maximum number of results to return.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        if metaquery:
            raise ceilometer.NotImplementedError('Metaquery not implemented')

        _dimensions = dict(user_id=user,
                           project_id=project,
                           resource_id=resource,
                           source=source)

        _dimensions = {k: v for k, v in _dimensions.items() if v is not None}

        _search_kwargs = {'dimensions': _dimensions}

        if limit:
            _search_kwargs['limit'] = limit

        for metric in self.mc.metrics_list(**_search_kwargs):
            yield api_models.Meter(
                name=metric['name'],
                type=metric['dimensions'].get('type') or 'cumulative',
                unit=metric['dimensions'].get('unit'),
                resource_id=metric['dimensions'].get('resource_id'),
                project_id=metric['dimensions'].get('project_id'),
                source=metric['dimensions'].get('source'),
                user_id=metric['dimensions'].get('user_id'))

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of dictionaries containing sample information.

        {
          'source': source of the resource,
          'counter_name': name of the resource,
          'counter_type': type of the sample (gauge, delta, cumulative),
          'counter_unit': unit of the sample,
          'counter_volume': volume of the sample,
          'user_id': UUID of user owning the resource,
          'project_id': UUID of project owning the resource,
          'resource_id': UUID of the resource,
          'timestamp': timestamp of the sample,
          'resource_metadata': metadata of the sample,
          'message_id': message ID of the sample,
          'message_signature': message signature of the sample,
          'recorded_at': time the sample was recorded
          }

        :param sample_filter: constraints for the sample search.
        :param limit: Maximum number of results to return.
        """

        if not sample_filter or not sample_filter.meter:
            raise ceilometer.NotImplementedError(
                "Supply meter name at the least")

        if (sample_filter.start_timestamp_op
                and sample_filter.start_timestamp_op != 'ge'):
            raise ceilometer.NotImplementedError(
                ('Start time op %s '
                 'not implemented') % sample_filter.start_timestamp_op)

        if (sample_filter.end_timestamp_op
                and sample_filter.end_timestamp_op != 'le'):
            raise ceilometer.NotImplementedError(
                ('End time op %s '
                 'not implemented') % sample_filter.end_timestamp_op)

        q = {}
        if sample_filter.metaquery:
            q = self._convert_metaquery(sample_filter.metaquery)

        if sample_filter.message_id:
            raise ceilometer.NotImplementedError('message_id not '
                                                 'implemented '
                                                 'in get_samples')

        if not sample_filter.start_timestamp:
            sample_filter.start_timestamp = \
                timeutils.isotime(datetime.datetime(1970, 1, 1))
        else:
            sample_filter.start_timestamp = \
                timeutils.isotime(sample_filter.start_timestamp)

        if sample_filter.end_timestamp:
            sample_filter.end_timestamp = \
                timeutils.isotime(sample_filter.end_timestamp)

        _dimensions = dict(user_id=sample_filter.user,
                           project_id=sample_filter.project,
                           resource_id=sample_filter.resource,
                           source=sample_filter.source)

        _dimensions = {k: v for k, v in _dimensions.items() if v is not None}

        _search_args = dict(
            name=sample_filter.meter,
            start_time=sample_filter.start_timestamp,
            start_timestamp_op=(sample_filter.start_timestamp_op),
            end_time=sample_filter.end_timestamp,
            end_timestamp_op=sample_filter.end_timestamp_op,
            limit=limit,
            merge_metrics=True,
            dimensions=_dimensions)

        _search_args = {k: v for k, v in _search_args.items() if v is not None}

        for sample in self.mc.measurements_list(**_search_args):
            LOG.debug(_('Retrieved sample: %s'), sample)

            d = sample['dimensions']
            for measurement in sample['measurements']:
                meas_dict = self._convert_to_dict(measurement,
                                                  sample['columns'])
                vm = meas_dict['value_meta']
                if not self._match_metaquery_to_value_meta(q, vm):
                    continue
                yield api_models.Sample(
                    source=d.get('source'),
                    counter_name=sample['name'],
                    counter_type=d.get('type'),
                    counter_unit=d.get('unit'),
                    counter_volume=meas_dict['value'],
                    user_id=d.get('user_id'),
                    project_id=d.get('project_id'),
                    resource_id=d.get('resource_id'),
                    timestamp=timeutils.parse_isotime(meas_dict['timestamp']),
                    resource_metadata=meas_dict['value_meta'],
                    message_id=sample['id'],
                    message_signature='',
                    recorded_at=(timeutils.parse_isotime(
                        meas_dict['timestamp'])))

    def get_meter_statistics(self,
                             filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return a dictionary containing meter statistics.

        Meter statistics is described by the query parameters.
        The filter must have a meter value set.

        { 'min':
          'max':
          'avg':
          'sum':
          'count':
          'period':
          'period_start':
          'period_end':
          'duration':
          'duration_start':
          'duration_end':
          }
        """
        if filter:
            if not filter.meter:
                raise ceilometer.NotImplementedError('Query without meter '
                                                     'not implemented')
        else:
            raise ceilometer.NotImplementedError('Query without filter '
                                                 'not implemented')

        if groupby:
            raise ceilometer.NotImplementedError('Groupby not implemented')

        if filter.metaquery:
            raise ceilometer.NotImplementedError('Metaquery not implemented')

        if filter.message_id:
            raise ceilometer.NotImplementedError('Message_id query '
                                                 'not implemented')

        if filter.start_timestamp_op and filter.start_timestamp_op != 'ge':
            raise ceilometer.NotImplementedError(
                ('Start time op %s '
                 'not implemented') % filter.start_timestamp_op)

        if filter.end_timestamp_op and filter.end_timestamp_op != 'le':
            raise ceilometer.NotImplementedError(
                ('End time op %s '
                 'not implemented') % filter.end_timestamp_op)

        if not filter.start_timestamp:
            filter.start_timestamp = timeutils.isotime(
                datetime.datetime(1970, 1, 1))

        # TODO(monasca): Add this a config parameter
        allowed_stats = ['avg', 'min', 'max', 'sum', 'count']
        if aggregate:
            not_allowed_stats = [
                a.func for a in aggregate if a.func not in allowed_stats
            ]
            if not_allowed_stats:
                raise ceilometer.NotImplementedError(
                    ('Aggregate function(s) '
                     '%s not implemented') % not_allowed_stats)

            statistics = [a.func for a in aggregate if a.func in allowed_stats]
        else:
            statistics = allowed_stats

        dims_filter = dict(user_id=filter.user,
                           project_id=filter.project,
                           source=filter.source,
                           resource_id=filter.resource)
        dims_filter = {k: v for k, v in dims_filter.items() if v is not None}

        period = period if period \
            else cfg.CONF.monasca.default_stats_period

        _search_args = dict(name=filter.meter,
                            dimensions=dims_filter,
                            start_time=filter.start_timestamp,
                            end_time=filter.end_timestamp,
                            period=period,
                            statistics=','.join(statistics),
                            merge_metrics=True)

        _search_args = {k: v for k, v in _search_args.items() if v is not None}

        stats_list = self.mc.statistics_list(**_search_args)
        for stats in stats_list:
            for s in stats['statistics']:
                stats_dict = self._convert_to_dict(s, stats['columns'])
                ts_start = timeutils.parse_isotime(stats_dict['timestamp'])
                ts_end = ts_start + datetime.timedelta(0, period)
                del stats_dict['timestamp']
                if 'count' in stats_dict:
                    stats_dict['count'] = int(stats_dict['count'])
                yield api_models.Statistics(
                    unit=stats['dimensions'].get('unit'),
                    period=period,
                    period_start=ts_start,
                    period_end=ts_end,
                    duration=period,
                    duration_start=ts_start,
                    duration_end=ts_end,
                    groupby={u'': u''},
                    **stats_dict)
Ejemplo n.º 24
0
class Connection(base.Connection):
    """Put the event data into a SQLAlchemy database.

    Tables::

        - EventType
          - event definition
          - { id: event type id
              desc: description of event
              }
        - Event
          - event data
          - { id: event id
              message_id: message id
              generated = timestamp of event
              event_type_id = event type -> eventtype.id
              }
        - Trait
          - trait value
          - { event_id: event -> event.id
              trait_type_id: trait type -> traittype.id
              t_string: string value
              t_float: float value
              t_int: integer value
              t_datetime: timestamp value
              }
        - TraitType
          - trait definition
          - { id: trait id
              desc: description of trait
              data_type: data type (integer that maps to datatype)
              }
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        self._engine_facade = db_session.EngineFacade(
            url, **dict(cfg.CONF.database.items()))

    def upgrade(self):
        # NOTE(gordc): to minimise memory, only import migration when needed
        from oslo.db.sqlalchemy import migration
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                            '..', 'storage', 'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        self._engine_facade._session_maker.close_all()
        engine.dispose()

    def _get_or_create_trait_type(self, trait_type, data_type, session=None):
        """Find if this trait already exists in the database.

        If it does not, create a new entry in the trait type table.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            tt = session.query(models.TraitType).filter(
                models.TraitType.desc == trait_type,
                models.TraitType.data_type == data_type).first()
            if not tt:
                tt = models.TraitType(trait_type, data_type)
                session.add(tt)
        return tt

    def _make_trait(self, trait_model, event, session=None):
        """Make a new Trait from a Trait model.

        Doesn't flush or add to session.
        """
        trait_type = self._get_or_create_trait_type(trait_model.name,
                                                    trait_model.dtype, session)
        value_map = models.Trait._value_map
        values = {
            't_string': None,
            't_float': None,
            't_int': None,
            't_datetime': None
        }
        value = trait_model.value
        values[value_map[trait_model.dtype]] = value
        return models.Trait(trait_type, event, **values)

    def _get_or_create_event_type(self, event_type, session=None):
        """Check if an event type with the supplied name is already exists.

        If not, we create it and return the record. This may result in a flush.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            et = session.query(models.EventType).filter(
                models.EventType.desc == event_type).first()
            if not et:
                et = models.EventType(event_type)
                session.add(et)
        return et

    def _record_event(self, session, event_model):
        """Store a single Event, including related Traits."""
        with session.begin(subtransactions=True):
            event_type = self._get_or_create_event_type(event_model.event_type,
                                                        session=session)

            event = models.Event(event_model.message_id, event_type,
                                 event_model.generated)
            session.add(event)

            new_traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    t = self._make_trait(trait, event, session=session)
                    session.add(t)
                    new_traits.append(t)

        # Note: we don't flush here, explicitly (unless a new trait or event
        # does it). Otherwise, just wait until all the Events are staged.
        return event, new_traits

    def record_events(self, event_models):
        """Write the events to SQL database via sqlalchemy.

        :param event_models: a list of model.Event objects.

        Returns a list of events that could not be saved in a
        (reason, event) tuple. Reasons are enumerated in
        storage.model.Event

        Flush when they're all added, unless new EventTypes or
        TraitTypes are added along the way.
        """
        session = self._engine_facade.get_session()
        events = []
        problem_events = []
        for event_model in event_models:
            event = None
            try:
                with session.begin():
                    event = self._record_event(session, event_model)
            except dbexc.DBDuplicateEntry as e:
                LOG.exception(_("Failed to record duplicated event: %s") % e)
                problem_events.append(
                    (api_models.Event.DUPLICATE, event_model))
            except Exception as e:
                LOG.exception(_('Failed to record event: %s') % e)
                problem_events.append(
                    (api_models.Event.UNKNOWN_PROBLEM, event_model))
            events.append(event)
        return problem_events

    def get_events(self, event_filter):
        """Return an iterable of model.Event objects.

        :param event_filter: EventFilter instance
        """

        start = event_filter.start_time
        end = event_filter.end_time
        session = self._engine_facade.get_session()
        LOG.debug(_("Getting events that match filter: %s") % event_filter)
        with session.begin():
            event_query = session.query(models.Event)

            # Build up the join conditions
            event_join_conditions = [
                models.EventType.id == models.Event.event_type_id
            ]

            if event_filter.event_type:
                event_join_conditions.append(
                    models.EventType.desc == event_filter.event_type)

            event_query = event_query.join(models.EventType,
                                           sa.and_(*event_join_conditions))

            # Build up the where conditions
            event_filter_conditions = []
            if event_filter.message_id:
                event_filter_conditions.append(
                    models.Event.message_id == event_filter.message_id)
            if start:
                event_filter_conditions.append(models.Event.generated >= start)
            if end:
                event_filter_conditions.append(models.Event.generated <= end)

            if event_filter_conditions:
                event_query = (event_query.filter(
                    sa.and_(*event_filter_conditions)))

            event_models_dict = {}
            if event_filter.traits_filter:
                for trait_filter in event_filter.traits_filter:

                    # Build a sub query that joins Trait to TraitType
                    # where the trait name matches
                    trait_name = trait_filter.pop('key')
                    op = trait_filter.pop('op', 'eq')
                    conditions = [
                        models.Trait.trait_type_id == models.TraitType.id,
                        models.TraitType.desc == trait_name
                    ]

                    for key, value in six.iteritems(trait_filter):
                        sql_utils.trait_op_condition(conditions, key, value,
                                                     op)

                    trait_query = (session.query(models.Trait.event_id).join(
                        models.TraitType, sa.and_(*conditions)).subquery())

                    event_query = (event_query.join(
                        trait_query,
                        models.Event.id == trait_query.c.event_id))
            else:
                # If there are no trait filters, grab the events from the db
                query = (session.query(models.Event.id, models.Event.generated,
                                       models.Event.message_id,
                                       models.EventType.desc).join(
                                           models.EventType,
                                           sa.and_(*event_join_conditions)))
                if event_filter_conditions:
                    query = query.filter(sa.and_(*event_filter_conditions))
                for (id_, generated, message_id, desc_) in query.all():
                    event_models_dict[id_] = api_models.Event(
                        message_id, desc_, generated, [])

            # Build event models for the events
            event_query = event_query.subquery()
            query = (session.query(models.Trait).join(
                models.TraitType,
                models.Trait.trait_type_id == models.TraitType.id).join(
                    event_query, models.Trait.event_id == event_query.c.id))

            # Now convert the sqlalchemy objects back into Models ...
            for trait in query.all():
                event = event_models_dict.get(trait.event_id)
                if not event:
                    event = api_models.Event(trait.event.message_id,
                                             trait.event.event_type.desc,
                                             trait.event.generated, [])
                    event_models_dict[trait.event_id] = event
                trait_model = api_models.Trait(trait.trait_type.desc,
                                               trait.trait_type.data_type,
                                               trait.get_value())
                event.append_trait(trait_model)

        event_models = event_models_dict.values()
        return sorted(event_models, key=operator.attrgetter('generated'))

    def get_event_types(self):
        """Return all event types as an iterable of strings."""

        session = self._engine_facade.get_session()
        with session.begin():
            query = (session.query(models.EventType.desc).order_by(
                models.EventType.desc))
            for name in query.all():
                # The query returns a tuple with one element.
                yield name[0]

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.
        :param event_type: the type of the Event
        """
        session = self._engine_facade.get_session()

        LOG.debug(_("Get traits for %s") % event_type)
        with session.begin():
            query = (session.query(
                models.TraitType.desc, models.TraitType.data_type).join(
                    models.Trait,
                    models.Trait.trait_type_id == models.TraitType.id).join(
                        models.Event,
                        models.Event.id == models.Trait.event_id).join(
                            models.EventType,
                            sa.and_(
                                models.EventType.id == models.Event.id,
                                models.EventType.desc == event_type)).group_by(
                                    models.TraitType.desc,
                                    models.TraitType.data_type).distinct())

            for desc_, dtype in query.all():
                yield {'name': desc_, 'data_type': dtype}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """

        session = self._engine_facade.get_session()
        with session.begin():
            trait_type_filters = [
                models.TraitType.id == models.Trait.trait_type_id
            ]
            if trait_type:
                trait_type_filters.append(models.TraitType.desc == trait_type)

            query = (session.query(models.Trait).join(
                models.TraitType, sa.and_(*trait_type_filters)).join(
                    models.Event,
                    models.Event.id == models.Trait.event_id).join(
                        models.EventType,
                        sa.and_(
                            models.EventType.id == models.Event.event_type_id,
                            models.EventType.desc == event_type)))

            for trait in query.all():
                type = trait.trait_type
                yield api_models.Trait(name=type.desc,
                                       dtype=type.data_type,
                                       value=trait.get_value())
Ejemplo n.º 25
0
class Connection(base.Connection):
    """Base event Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def record_events(self, event_models):
        """Write the events to database.

        :param event_models: a list of models.Event objects.
        """
        error = None
        for event_model in event_models:
            traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    traits.append({'trait_name': trait.name,
                                   'trait_type': trait.dtype,
                                   'trait_value': trait.value})
            try:
                self.db.event.insert_one(
                    {'_id': event_model.message_id,
                     'event_type': event_model.event_type,
                     'timestamp': event_model.generated,
                     'traits': traits, 'raw': event_model.raw})
            except pymongo.errors.DuplicateKeyError as ex:
                LOG.info(_LI("Duplicate event detected, skipping it: %s") % ex)
            except Exception as ex:
                LOG.exception(_LE("Failed to record event: %s") % ex)
                error = ex
        if error:
            raise error

    def get_events(self, event_filter, limit=None):
        """Return an iter of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
                             for events that are stored in database.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return
        q = pymongo_utils.make_events_query_from_filter(event_filter)
        if limit is not None:
            results = self.db.event.find(q, limit=limit)
        else:
            results = self.db.event.find(q)
        for event in results:
            traits = []
            for trait in event['traits']:
                traits.append(models.Trait(name=trait['trait_name'],
                                           dtype=int(trait['trait_type']),
                                           value=trait['trait_value']))
            yield models.Event(message_id=event['_id'],
                               event_type=event['event_type'],
                               generated=event['timestamp'],
                               traits=traits, raw=event.get('raw'))

    def get_event_types(self):
        """Return all event types as an iter of strings."""
        return self.db.event.distinct('event_type')

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event.
        """
        trait_names = set()
        events = self.db.event.find({'event_type': event_type})

        for event in events:
            for trait in event['traits']:
                trait_name = trait['trait_name']
                if trait_name not in trait_names:
                    # Here we check that our method return only unique
                    # trait types. Method will return only one trait type. It
                    # is proposed that certain trait name could have only one
                    # trait type.
                    trait_names.add(trait_name)
                    yield {'name': trait_name,
                           'data_type': trait['trait_type']}

    def get_traits(self, event_type, trait_name=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.

        :param event_type: the type of the Event to filter by
        :param trait_name: the name of the Trait to filter by
        """
        if not trait_name:
            events = self.db.event.find({'event_type': event_type})
        else:
            # We choose events that simultaneously have event_type and certain
            # trait_name, and retrieve events contains only mentioned traits.
            events = self.db.event.find({'$and': [{'event_type': event_type},
                                        {'traits.trait_name': trait_name}]},
                                        {'traits': {'$elemMatch':
                                                    {'trait_name': trait_name}}
                                         })
        for event in events:
            for trait in event['traits']:
                yield models.Trait(name=trait['trait_name'],
                                   dtype=trait['trait_type'],
                                   value=trait['trait_value'])
Ejemplo n.º 26
0
class Connection(pymongo_base.Connection):
    """Put the data into a MongoDB database

    Collections::

        - meter
          - the raw incoming data
        - resource
          - the metadata for resources
          - { _id: uuid of resource,
              metadata: metadata dictionaries
              user_id: uuid
              project_id: uuid
              meter: [ array of {counter_name: string, counter_type: string,
                                 counter_unit: string} ]
            }
    """

    CAPABILITIES = utils.update_nested(pymongo_base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    CONNECTION_POOL = pymongo_utils.ConnectionPool()

    STANDARD_AGGREGATES = dict([(a.name, a) for a in [
        pymongo_utils.SUM_AGGREGATION,
        pymongo_utils.AVG_AGGREGATION,
        pymongo_utils.MIN_AGGREGATION,
        pymongo_utils.MAX_AGGREGATION,
        pymongo_utils.COUNT_AGGREGATION,
    ]])

    AGGREGATES = dict([(a.name, a) for a in [
        pymongo_utils.SUM_AGGREGATION,
        pymongo_utils.AVG_AGGREGATION,
        pymongo_utils.MIN_AGGREGATION,
        pymongo_utils.MAX_AGGREGATION,
        pymongo_utils.COUNT_AGGREGATION,
        pymongo_utils.STDDEV_AGGREGATION,
        pymongo_utils.CARDINALITY_AGGREGATION,
    ]])

    SORT_OPERATION_MAPPING = {
        'desc': (pymongo.DESCENDING, '$lt'),
        'asc': (pymongo.ASCENDING, '$gt')
    }

    MAP_RESOURCES = bson.code.Code("""
    function () {
        emit(this.resource_id,
             {user_id: this.user_id,
              project_id: this.project_id,
              source: this.source,
              first_timestamp: this.timestamp,
              last_timestamp: this.timestamp,
              metadata: this.resource_metadata})
    }""")

    REDUCE_RESOURCES = bson.code.Code("""
    function (key, values) {
        var merge = {user_id: values[0].user_id,
                     project_id: values[0].project_id,
                     source: values[0].source,
                     first_timestamp: values[0].first_timestamp,
                     last_timestamp: values[0].last_timestamp,
                     metadata: values[0].metadata}
        values.forEach(function(value) {
            if (merge.first_timestamp - value.first_timestamp > 0) {
                merge.first_timestamp = value.first_timestamp;
                merge.user_id = value.user_id;
                merge.project_id = value.project_id;
                merge.source = value.source;
            } else if (merge.last_timestamp - value.last_timestamp <= 0) {
                merge.last_timestamp = value.last_timestamp;
                merge.metadata = value.metadata;
            }
        });
        return merge;
      }""")

    _GENESIS = datetime.datetime(year=datetime.MINYEAR, month=1, day=1)
    _APOCALYPSE = datetime.datetime(year=datetime.MAXYEAR,
                                    month=12,
                                    day=31,
                                    hour=23,
                                    minute=59,
                                    second=59)

    def __init__(self, url):

        # NOTE(jd) Use our own connection pooling on top of the Pymongo one.
        # We need that otherwise we overflow the MongoDB instance with new
        # connection since we instantiate a Pymongo client each time someone
        # requires a new storage connection.
        self.conn = self.CONNECTION_POOL.connect(url)
        self.version = self.conn.server_info()['versionArray']
        # Require MongoDB 2.4 to use $setOnInsert
        if self.version < pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION:
            raise storage.StorageBadVersion(
                "Need at least MongoDB %s" %
                pymongo_utils.MINIMUM_COMPATIBLE_MONGODB_VERSION)

        connection_options = pymongo.uri_parser.parse_uri(url)
        self.db = getattr(self.conn, connection_options['database'])
        if connection_options.get('username'):
            self.db.authenticate(connection_options['username'],
                                 connection_options['password'])

        # NOTE(jd) Upgrading is just about creating index, so let's do this
        # on connection to be sure at least the TTL is correctly updated if
        # needed.
        self.upgrade()

    @staticmethod
    def update_ttl(ttl, ttl_index_name, index_field, coll):
        """Update or create time_to_live indexes.

        :param ttl: time to live in seconds.
        :param ttl_index_name: name of the index we want to update or create.
        :param index_field: field with the index that we need to update.
        :param coll: collection which indexes need to be updated.
        """
        indexes = coll.index_information()
        if ttl <= 0:
            if ttl_index_name in indexes:
                coll.drop_index(ttl_index_name)
            return

        if ttl_index_name in indexes:
            return coll.database.command('collMod',
                                         coll.name,
                                         index={
                                             'keyPattern': {
                                                 index_field: pymongo.ASCENDING
                                             },
                                             'expireAfterSeconds': ttl
                                         })

        coll.create_index([(index_field, pymongo.ASCENDING)],
                          expireAfterSeconds=ttl,
                          name=ttl_index_name)

    def upgrade(self):
        # Establish indexes
        #
        # We need variations for user_id vs. project_id because of the
        # way the indexes are stored in b-trees. The user_id and
        # project_id values are usually mutually exclusive in the
        # queries, so the database won't take advantage of an index
        # including both.

        # create collection if not present
        if 'resource' not in self.db.conn.collection_names():
            self.db.conn.create_collection('resource')
        if 'meter' not in self.db.conn.collection_names():
            self.db.conn.create_collection('meter')

        name_qualifier = dict(user_id='', project_id='project_')
        background = dict(user_id=False, project_id=True)
        for primary in ['user_id', 'project_id']:
            name = 'meter_%sidx' % name_qualifier[primary]
            self.db.meter.create_index([
                ('resource_id', pymongo.ASCENDING),
                (primary, pymongo.ASCENDING),
                ('counter_name', pymongo.ASCENDING),
                ('timestamp', pymongo.ASCENDING),
            ],
                                       name=name,
                                       background=background[primary])

        self.db.meter.create_index([('timestamp', pymongo.DESCENDING)],
                                   name='timestamp_idx')

        # NOTE(ityaptin) This index covers get_resource requests sorting
        # and MongoDB uses part of this compound index for different
        # queries based on any of user_id, project_id, last_sample_timestamp
        # fields
        self.db.resource.create_index(
            [('user_id', pymongo.DESCENDING),
             ('project_id', pymongo.DESCENDING),
             ('last_sample_timestamp', pymongo.DESCENDING)],
            name='resource_user_project_timestamp',
        )
        self.db.resource.create_index(
            [('last_sample_timestamp', pymongo.DESCENDING)],
            name='last_sample_timestamp_idx')

        # update or create time_to_live index
        ttl = cfg.CONF.database.metering_time_to_live
        self.update_ttl(ttl, 'meter_ttl', 'timestamp', self.db.meter)
        self.update_ttl(ttl, 'resource_ttl', 'last_sample_timestamp',
                        self.db.resource)

    def clear(self):
        self.conn.drop_database(self.db.name)
        # Connection will be reopened automatically if needed
        self.conn.close()

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        # Record the updated resource metadata - we use $setOnInsert to
        # unconditionally insert sample timestamps and resource metadata
        # (in the update case, this must be conditional on the sample not
        # being out-of-order)
        data = copy.deepcopy(data)
        data['resource_metadata'] = pymongo_utils.improve_keys(
            data.pop('resource_metadata'))
        resource = self.db.resource.find_one_and_update(
            {'_id': data['resource_id']},
            {
                '$set': {
                    'project_id': data['project_id'],
                    'user_id': data['user_id'],
                    'source': data['source'],
                },
                '$setOnInsert': {
                    'metadata': data['resource_metadata'],
                    'first_sample_timestamp': data['timestamp'],
                    'last_sample_timestamp': data['timestamp'],
                },
                '$addToSet': {
                    'meter': {
                        'counter_name': data['counter_name'],
                        'counter_type': data['counter_type'],
                        'counter_unit': data['counter_unit'],
                    },
                },
            },
            upsert=True,
            return_document=pymongo.ReturnDocument.AFTER,
        )

        # only update last sample timestamp if actually later (the usual
        # in-order case)
        last_sample_timestamp = resource.get('last_sample_timestamp')
        if (last_sample_timestamp is None
                or last_sample_timestamp <= data['timestamp']):
            self.db.resource.update_one({'_id': data['resource_id']}, {
                '$set': {
                    'metadata': data['resource_metadata'],
                    'last_sample_timestamp': data['timestamp']
                }
            })

        # only update first sample timestamp if actually earlier (the unusual
        # out-of-order case)
        # NOTE: a null first sample timestamp is not updated as this indicates
        # a pre-existing resource document dating from before we started
        # recording these timestamps in the resource collection
        first_sample_timestamp = resource.get('first_sample_timestamp')
        if (first_sample_timestamp is not None
                and first_sample_timestamp > data['timestamp']):
            self.db.resource.update_one(
                {'_id': data['resource_id']},
                {'$set': {
                    'first_sample_timestamp': data['timestamp']
                }})

        # Record the raw data for the meter. Use a copy so we do not
        # modify a data structure owned by our caller (the driver adds
        # a new key '_id').
        record = copy.copy(data)
        record['recorded_at'] = timeutils.utcnow()

        self.db.meter.insert_one(record)

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs with native MongoDB time-to-live feature.
        """
        LOG.debug("Clearing expired metering data is based on native "
                  "MongoDB time to live feature and going in background.")

    @staticmethod
    def _get_marker(db_collection, marker_pairs):
        """Return the mark document according to the attribute-value pairs.

        :param db_collection: Database collection that be query.
        :param maker_pairs: Attribute-value pairs filter.
        """
        if db_collection is None:
            return
        if not marker_pairs:
            return
        ret = db_collection.find(marker_pairs, limit=2)

        if ret.count() == 0:
            raise base.NoResultFound
        elif ret.count() > 1:
            raise base.MultipleResultsFound
        else:
            _ret = ret.__getitem__(0)
            return _ret

    @classmethod
    def _recurse_sort_keys(cls, sort_keys, marker, flag):
        _first = sort_keys[0]
        value = marker[_first]
        if len(sort_keys) == 1:
            return {_first: {flag: value}}
        else:
            criteria_equ = {_first: {'eq': value}}
            criteria_cmp = cls._recurse_sort_keys(sort_keys[1:], marker, flag)
        return dict(criteria_equ, **criteria_cmp)

    @classmethod
    def _build_sort_instructions(cls, sort_keys=None, sort_dir='desc'):
        """Returns a sort_instruction and paging operator.

        Sort instructions are used in the query to determine what attributes
        to sort on and what direction to use.
        :param q: The query dict passed in.
        :param sort_keys: array of attributes by which results be sorted.
        :param sort_dir: direction in which results be sorted (asc, desc).
        :return: sort instructions and paging operator
        """
        sort_keys = sort_keys or []
        sort_instructions = []
        _sort_dir, operation = cls.SORT_OPERATION_MAPPING.get(
            sort_dir, cls.SORT_OPERATION_MAPPING['desc'])

        for _sort_key in sort_keys:
            _instruction = (_sort_key, _sort_dir)
            sort_instructions.append(_instruction)

        return sort_instructions, operation

    def _get_time_constrained_resources(self, query, start_timestamp,
                                        start_timestamp_op, end_timestamp,
                                        end_timestamp_op, metaquery, resource,
                                        limit):
        """Return an iterable of models.Resource instances

        Items are constrained by sample timestamp.
        :param query: project/user/source query
        :param start_timestamp: modified timestamp start range.
        :param start_timestamp_op: start time operator, like gt, ge.
        :param end_timestamp: modified timestamp end range.
        :param end_timestamp_op: end time operator, like lt, le.
        :param metaquery: dict with metadata to match on.
        :param resource: resource filter.
        """
        if resource is not None:
            query['resource_id'] = resource

        # Add resource_ prefix so it matches the field in the db
        query.update(
            dict(('resource_' + k, v) for (k, v) in six.iteritems(metaquery)))

        # FIXME(dhellmann): This may not perform very well,
        # but doing any better will require changing the database
        # schema and that will need more thought than I have time
        # to put into it today.
        # Look for resources matching the above criteria and with
        # samples in the time range we care about, then change the
        # resource query to return just those resources by id.
        ts_range = pymongo_utils.make_timestamp_range(start_timestamp,
                                                      end_timestamp,
                                                      start_timestamp_op,
                                                      end_timestamp_op)
        if ts_range:
            query['timestamp'] = ts_range

        sort_keys = base._handle_sort_key('resource')
        sort_instructions = self._build_sort_instructions(sort_keys)[0]

        # use a unique collection name for the results collection,
        # as result post-sorting (as oppposed to reduce pre-sorting)
        # is not possible on an inline M-R
        out = 'resource_list_%s' % uuid.uuid4()
        self.db.meter.map_reduce(self.MAP_RESOURCES,
                                 self.REDUCE_RESOURCES,
                                 out=out,
                                 sort={'resource_id': 1},
                                 query=query)

        try:
            if limit is not None:
                results = self.db[out].find(sort=sort_instructions,
                                            limit=limit)
            else:
                results = self.db[out].find(sort=sort_instructions)
            for r in results:
                resource = r['value']
                yield models.Resource(
                    resource_id=r['_id'],
                    user_id=resource['user_id'],
                    project_id=resource['project_id'],
                    first_sample_timestamp=resource['first_timestamp'],
                    last_sample_timestamp=resource['last_timestamp'],
                    source=resource['source'],
                    metadata=pymongo_utils.unquote_keys(resource['metadata']))
        finally:
            self.db[out].drop()

    def _get_floating_resources(self, query, metaquery, resource, limit):
        """Return an iterable of models.Resource instances

        Items are unconstrained by timestamp.
        :param query: project/user/source query
        :param metaquery: dict with metadata to match on.
        :param resource: resource filter.
        """
        if resource is not None:
            query['_id'] = resource

        query.update(dict((k, v) for (k, v) in six.iteritems(metaquery)))

        keys = base._handle_sort_key('resource')
        sort_keys = [
            'last_sample_timestamp' if i == 'timestamp' else i for i in keys
        ]
        sort_instructions = self._build_sort_instructions(sort_keys)[0]

        if limit is not None:
            results = self.db.resource.find(query,
                                            sort=sort_instructions,
                                            limit=limit)
        else:
            results = self.db.resource.find(query, sort=sort_instructions)

        for r in results:
            yield models.Resource(
                resource_id=r['_id'],
                user_id=r['user_id'],
                project_id=r['project_id'],
                first_sample_timestamp=r.get('first_sample_timestamp',
                                             self._GENESIS),
                last_sample_timestamp=r.get('last_sample_timestamp',
                                            self._APOCALYPSE),
                source=r['source'],
                metadata=pymongo_utils.unquote_keys(r['metadata']))

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      limit=None):
        """Return an iterable of models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return
        metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {}

        query = {}
        if user is not None:
            query['user_id'] = user
        if project is not None:
            query['project_id'] = project
        if source is not None:
            query['source'] = source

        if start_timestamp or end_timestamp:
            return self._get_time_constrained_resources(
                query, start_timestamp, start_timestamp_op, end_timestamp,
                end_timestamp_op, metaquery, resource, limit)
        else:
            return self._get_floating_resources(query, metaquery, resource,
                                                limit)

    @staticmethod
    def _make_period_dict(period, first_ts):
        """Create a period field for _id of grouped fields.

        :param period: Period duration in seconds
        :param first_ts: First timestamp for first period
        :return:
        """
        if period >= 0:
            period_unique_dict = {
                "period_start": {
                    "$divide": [{
                        "$subtract": [{
                            "$subtract": ["$timestamp", first_ts]
                        }, {
                            "$mod": [{
                                "$subtract": ["$timestamp", first_ts]
                            }, period * 1000]
                        }]
                    }, period * 1000]
                }
            }
        else:
            # Note(ityaptin) Hack for older MongoDB versions (2.4.+ and older).
            # Since 2.6+ we could use $literal operator
            period_unique_dict = {"$period_start": {"$add": [0, 0]}}
        return period_unique_dict

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instance.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """

        if (groupby and set(groupby) - set([
                'user_id', 'project_id', 'resource_id', 'source',
                'resource_metadata.instance_type'
        ])):
            raise ceilometer.NotImplementedError(
                "Unable to group by these fields")
        q = pymongo_utils.make_query_from_filter(sample_filter)

        group_stage = {}
        project_stage = {
            "unit": "$_id.unit",
            "name": "$_id.name",
            "first_timestamp": "$first_timestamp",
            "last_timestamp": "$last_timestamp",
            "period_start": "$_id.period_start",
        }

        # Add timestamps to $group stage
        group_stage.update({
            "first_timestamp": {
                "$min": "$timestamp"
            },
            "last_timestamp": {
                "$max": "$timestamp"
            }
        })

        # Define a _id field for grouped documents
        unique_group_field = {"name": "$counter_name", "unit": "$counter_unit"}

        # Define a first timestamp for periods
        if sample_filter.start_timestamp:
            first_timestamp = sample_filter.start_timestamp
        else:
            first_timestamp_cursor = self.db.meter.find(limit=1,
                                                        sort=[
                                                            ('timestamp',
                                                             pymongo.ASCENDING)
                                                        ])
            if first_timestamp_cursor.count():
                first_timestamp = first_timestamp_cursor[0]['timestamp']
            else:
                first_timestamp = utils.EPOCH_TIME

        # Add a start_period field to unique identifier of grouped documents
        if period:
            period_dict = self._make_period_dict(period, first_timestamp)
            unique_group_field.update(period_dict)

        # Add a groupby fields to unique identifier of grouped documents
        if groupby:
            unique_group_field.update(
                dict((field.replace(".", "/"), "$%s" % field)
                     for field in groupby))

        group_stage.update({"_id": unique_group_field})

        self._compile_aggregate_stages(aggregate, group_stage, project_stage)

        # Aggregation stages list. It's work one by one and uses documents
        # from previous stages.
        aggregation_query = [{
            '$match': q
        }, {
            "$sort": {
                "timestamp": 1
            }
        }, {
            "$group": group_stage
        }, {
            "$sort": {
                "_id.period_start": 1
            }
        }, {
            "$project": project_stage
        }]

        # results is dict in pymongo<=2.6.3 and CommandCursor in >=3.0
        results = self.db.meter.aggregate(aggregation_query,
                                          **self._make_aggregation_params())
        return [
            self._stats_result_to_model(point, groupby, aggregate, period,
                                        first_timestamp)
            for point in self._get_results(results)
        ]

    def _stats_result_aggregates(self, result, aggregate):
        stats_args = {}
        for attr in Connection.STANDARD_AGGREGATES.keys():
            if attr in result:
                stats_args[attr] = result[attr]

        if aggregate:
            stats_args['aggregate'] = {}
            for agr in aggregate:
                stats_args['aggregate'].update(
                    Connection.AGGREGATES[agr.func].finalize(
                        result, agr.param, self.version))
        return stats_args

    def _stats_result_to_model(self, result, groupby, aggregate, period,
                               first_timestamp):
        if period is None:
            period = 0
        first_timestamp = pymongo_utils.from_unix_timestamp(first_timestamp)
        stats_args = self._stats_result_aggregates(result, aggregate)

        stats_args['unit'] = result['unit']
        stats_args['duration'] = (result["last_timestamp"] -
                                  result["first_timestamp"]).total_seconds()
        stats_args['duration_start'] = result['first_timestamp']
        stats_args['duration_end'] = result['last_timestamp']
        stats_args['period'] = period
        start = result.get("period_start", 0) * period

        stats_args['period_start'] = (first_timestamp +
                                      datetime.timedelta(seconds=start))
        stats_args['period_end'] = (first_timestamp +
                                    datetime.timedelta(seconds=start + period)
                                    if period else result['last_timestamp'])

        stats_args['groupby'] = (dict(
            (g, result['_id'].get(g.replace(".", "/")))
            for g in groupby) if groupby else None)
        return models.Statistics(**stats_args)

    def _compile_aggregate_stages(self, aggregate, group_stage, project_stage):
        if not aggregate:
            for aggregation in Connection.STANDARD_AGGREGATES.values():
                group_stage.update(
                    aggregation.group(version_array=self.version))
                project_stage.update(
                    aggregation.project(version_array=self.version))
        else:
            for description in aggregate:
                aggregation = Connection.AGGREGATES.get(description.func)
                if aggregation:
                    if not aggregation.validate(description.param):
                        raise storage.StorageBadAggregate(
                            'Bad aggregate: %s.%s' %
                            (description.func, description.param))
                    group_stage.update(
                        aggregation.group(description.param,
                                          version_array=self.version))
                    project_stage.update(
                        aggregation.project(description.param,
                                            version_array=self.version))

    @staticmethod
    def _get_results(results):
        if isinstance(results, dict):
            return results.get('result', [])
        else:
            return results

    def _make_aggregation_params(self):
        if self.version >= pymongo_utils.COMPLETE_AGGREGATE_COMPATIBLE_VERSION:
            return {"allowDiskUse": True}
        return {}
Ejemplo n.º 27
0
class Connection(base.Connection):
    """Put the data into a SQLAlchemy database.

    Tables::

        - meter
          - meter definition
          - { id: meter id
              name: meter name
              type: meter type
              unit: meter unit
              }
        - resource
          - resource definition
          - { internal_id: resource id
              resource_id: resource uuid
              user_id: user uuid
              project_id: project uuid
              source_id: source id
              resource_metadata: metadata dictionary
              metadata_hash: metadata dictionary hash
              }
        - sample
          - the raw incoming data
          - { id: sample id
              meter_id: meter id            (->meter.id)
              resource_id: resource id      (->resource.internal_id)
              volume: sample volume
              timestamp: datetime
              recorded_at: datetime
              message_signature: message signature
              message_id: message uuid
              }
    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        self._engine_facade = db_session.EngineFacade(
            url,
            **dict(cfg.CONF.database.items())
        )

    def upgrade(self):
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                            'sqlalchemy', 'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        self._engine_facade._session_maker.close_all()
        engine.dispose()

    @staticmethod
    def _create_meter(session, name, type, unit):
        # TODO(gordc): implement lru_cache to improve performance
        try:
            nested = session.connection().dialect.name != 'sqlite'
            with session.begin(nested=nested,
                               subtransactions=not nested):
                obj = (session.query(models.Meter)
                       .filter(models.Meter.name == name)
                       .filter(models.Meter.type == type)
                       .filter(models.Meter.unit == unit).first())
                if obj is None:
                    obj = models.Meter(name=name, type=type, unit=unit)
                    session.add(obj)
        except dbexc.DBDuplicateEntry:
            # retry function to pick up duplicate committed object
            obj = Connection._create_meter(session, name, type, unit)

        return obj

    @staticmethod
    def _create_resource(session, res_id, user_id, project_id, source_id,
                         rmeta):
        # TODO(gordc): implement lru_cache to improve performance
        try:
            nested = session.connection().dialect.name != 'sqlite'
            m_hash = jsonutils.dumps(rmeta, sort_keys=True)
            with session.begin(nested=nested,
                               subtransactions=not nested):
                obj = (session.query(models.Resource.internal_id)
                       .filter(models.Resource.resource_id == res_id)
                       .filter(models.Resource.user_id == user_id)
                       .filter(models.Resource.project_id == project_id)
                       .filter(models.Resource.source_id == source_id)
                       .filter(models.Resource.metadata_hash ==
                               hashlib.md5(m_hash).hexdigest()).first())
                obj_id = obj[0] if obj else None
                if obj_id is None:
                    obj = models.Resource(resource_id=res_id, user_id=user_id,
                                          project_id=project_id,
                                          source_id=source_id,
                                          resource_metadata=rmeta)
                    session.add(obj)
                    session.flush()
                    obj_id = obj.internal_id
                    if rmeta and isinstance(rmeta, dict):
                        meta_map = {}
                        for key, v in utils.dict_to_keyval(rmeta):
                            try:
                                _model = sql_utils.META_TYPE_MAP[type(v)]
                                if meta_map.get(_model) is None:
                                    meta_map[_model] = []
                                meta_map[_model].append(
                                    {'id': obj_id, 'meta_key': key,
                                     'value': v})
                            except KeyError:
                                LOG.warn(_("Unknown metadata type. Key (%s) "
                                           "will not be queryable."), key)
                        for _model in meta_map.keys():
                            session.execute(_model.__table__.insert(),
                                            meta_map[_model])

        except dbexc.DBDuplicateEntry:
            # retry function to pick up duplicate committed object
            obj_id = Connection._create_resource(session, res_id, user_id,
                                                 project_id, source_id, rmeta)

        return obj_id

    def record_metering_data(self, data):
        """Write the data to the backend storage system.

        :param data: a dictionary such as returned by
                     ceilometer.meter.meter_message_from_counter
        """
        session = self._engine_facade.get_session()
        with session.begin():
            # Record the raw data for the sample.
            meter = self._create_meter(session,
                                       data['counter_name'],
                                       data['counter_type'],
                                       data['counter_unit'])
            res_id = self._create_resource(session,
                                           data['resource_id'],
                                           data['user_id'],
                                           data['project_id'],
                                           data['source'],
                                           data['resource_metadata'])
            sample = models.Sample(
                meter_id=meter.id,
                resource_id=res_id,
                timestamp=data['timestamp'],
                volume=data['counter_volume'],
                message_signature=data['message_signature'],
                message_id=data['message_id'])
            session.add(sample)

    def clear_expired_metering_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs according to the time-to-live.
        :param ttl: Number of seconds to keep records for.
        """

        session = self._engine_facade.get_session()
        with session.begin():
            end = timeutils.utcnow() - datetime.timedelta(seconds=ttl)
            sample_q = (session.query(models.Sample)
                        .filter(models.Sample.timestamp < end))

            sample_subq = sample_q.subquery()
            for table in [models.MetaText, models.MetaBigInt,
                          models.MetaFloat, models.MetaBool]:
                (session.query(table)
                 .join(sample_subq, sample_subq.c.id == table.id)
                 .delete())

            rows = sample_q.delete()
            # remove Meter definitions with no matching samples
            (session.query(models.Meter)
             .filter(~models.Meter.samples.any())
             .delete(synchronize_session='fetch'))
            (session.query(models.Resource)
             .filter(~models.Resource.samples.any())
             .delete(synchronize_session='fetch'))
            LOG.info(_("%d samples removed from database"), rows)

    def get_resources(self, user=None, project=None, source=None,
                      start_timestamp=None, start_timestamp_op=None,
                      end_timestamp=None, end_timestamp_op=None,
                      metaquery=None, resource=None, pagination=None):
        """Return an iterable of api_models.Resource instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param source: Optional source filter.
        :param start_timestamp: Optional modified timestamp start range.
        :param start_timestamp_op: Optional start time operator, like gt, ge.
        :param end_timestamp: Optional modified timestamp end range.
        :param end_timestamp_op: Optional end time operator, like lt, le.
        :param metaquery: Optional dict with metadata to match on.
        :param resource: Optional resource filter.
        :param pagination: Optional pagination query.
        """
        if pagination:
            raise NotImplementedError('Pagination not implemented')

        s_filter = storage.SampleFilter(user=user,
                                        project=project,
                                        source=source,
                                        start=start_timestamp,
                                        start_timestamp_op=start_timestamp_op,
                                        end=end_timestamp,
                                        end_timestamp_op=end_timestamp_op,
                                        metaquery=metaquery,
                                        resource=resource)

        session = self._engine_facade.get_session()
        # get list of resource_ids
        res_q = session.query(distinct(models.Resource.resource_id)).join(
            models.Sample,
            models.Sample.resource_id == models.Resource.internal_id)
        res_q = make_query_from_filter(session, res_q, s_filter,
                                       require_meter=False)

        for res_id in res_q.all():
            # get latest Sample
            max_q = (session.query(models.Sample)
                     .join(models.Resource,
                           models.Resource.internal_id ==
                           models.Sample.resource_id)
                     .filter(models.Resource.resource_id == res_id[0]))
            max_q = make_query_from_filter(session, max_q, s_filter,
                                           require_meter=False)
            max_q = max_q.order_by(models.Sample.timestamp.desc(),
                                   models.Sample.id.desc()).limit(1)

            # get the min timestamp value.
            min_q = (session.query(models.Sample.timestamp)
                     .join(models.Resource,
                           models.Resource.internal_id ==
                           models.Sample.resource_id)
                     .filter(models.Resource.resource_id == res_id[0]))
            min_q = make_query_from_filter(session, min_q, s_filter,
                                           require_meter=False)
            min_q = min_q.order_by(models.Sample.timestamp.asc()).limit(1)

            sample = max_q.first()
            if sample:
                yield api_models.Resource(
                    resource_id=sample.resource.resource_id,
                    project_id=sample.resource.project_id,
                    first_sample_timestamp=min_q.first().timestamp,
                    last_sample_timestamp=sample.timestamp,
                    source=sample.resource.source_id,
                    user_id=sample.resource.user_id,
                    metadata=sample.resource.resource_metadata
                )

    def get_meters(self, user=None, project=None, resource=None, source=None,
                   metaquery=None, pagination=None):
        """Return an iterable of api_models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional ID of the resource.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise NotImplementedError('Pagination not implemented')

        s_filter = storage.SampleFilter(user=user,
                                        project=project,
                                        source=source,
                                        metaquery=metaquery,
                                        resource=resource)

        # NOTE(gordc): get latest sample of each meter/resource. we do not
        #              filter here as we want to filter only on latest record.
        session = self._engine_facade.get_session()
        subq = session.query(func.max(models.Sample.id).label('id')).join(
            models.Resource,
            models.Resource.internal_id == models.Sample.resource_id).group_by(
            models.Sample.meter_id, models.Resource.resource_id)
        if resource:
            subq = subq.filter(models.Resource.resource_id == resource)
        subq = subq.subquery()

        # get meter details for samples.
        query_sample = (session.query(models.Sample.meter_id,
                                      models.Meter.name, models.Meter.type,
                                      models.Meter.unit,
                                      models.Resource.resource_id,
                                      models.Resource.project_id,
                                      models.Resource.source_id,
                                      models.Resource.user_id).join(
            subq, subq.c.id == models.Sample.id)
            .join(models.Meter, models.Meter.id == models.Sample.meter_id)
            .join(models.Resource,
                  models.Resource.internal_id == models.Sample.resource_id))
        query_sample = make_query_from_filter(session, query_sample, s_filter,
                                              require_meter=False)

        for row in query_sample.all():
            yield api_models.Meter(
                name=row.name,
                type=row.type,
                unit=row.unit,
                resource_id=row.resource_id,
                project_id=row.project_id,
                source=row.source_id,
                user_id=row.user_id)

    def _retrieve_samples(self, query):
        samples = query.all()

        for s in samples:
            # Remove the id generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            yield api_models.Sample(
                source=s.source_id,
                counter_name=s.counter_name,
                counter_type=s.counter_type,
                counter_unit=s.counter_unit,
                counter_volume=s.counter_volume,
                user_id=s.user_id,
                project_id=s.project_id,
                resource_id=s.resource_id,
                timestamp=s.timestamp,
                recorded_at=s.recorded_at,
                resource_metadata=s.resource_metadata,
                message_id=s.message_id,
                message_signature=s.message_signature,
            )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of api_models.Samples.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        query = session.query(models.Sample.timestamp,
                              models.Sample.recorded_at,
                              models.Sample.message_id,
                              models.Sample.message_signature,
                              models.Sample.volume.label('counter_volume'),
                              models.Meter.name.label('counter_name'),
                              models.Meter.type.label('counter_type'),
                              models.Meter.unit.label('counter_unit'),
                              models.Resource.source_id,
                              models.Resource.user_id,
                              models.Resource.project_id,
                              models.Resource.resource_metadata,
                              models.Resource.resource_id).join(
            models.Meter, models.Meter.id == models.Sample.meter_id).join(
            models.Resource,
            models.Resource.internal_id == models.Sample.resource_id).order_by(
            models.Sample.timestamp.desc())
        query = make_query_from_filter(session, query, sample_filter,
                                       require_meter=False)
        if limit:
            query = query.limit(limit)
        return self._retrieve_samples(query)

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        if limit == 0:
            return []

        session = self._engine_facade.get_session()
        query = session.query(models.FullSample)
        transformer = sql_utils.QueryTransformer(models.FullSample, query)
        if filter_expr is not None:
            transformer.apply_filter(filter_expr)

        transformer.apply_options(orderby, limit)
        return self._retrieve_samples(transformer.get_query())

    @staticmethod
    def _get_aggregate_functions(aggregate):
        if not aggregate:
            return [f for f in STANDARD_AGGREGATES.values()]

        functions = []

        for a in aggregate:
            if a.func in STANDARD_AGGREGATES:
                functions.append(STANDARD_AGGREGATES[a.func])
            elif a.func in UNPARAMETERIZED_AGGREGATES:
                functions.append(UNPARAMETERIZED_AGGREGATES[a.func])
            elif a.func in PARAMETERIZED_AGGREGATES['compute']:
                validate = PARAMETERIZED_AGGREGATES['validate'].get(a.func)
                if not (validate and validate(a.param)):
                    raise storage.StorageBadAggregate('Bad aggregate: %s.%s'
                                                      % (a.func, a.param))
                compute = PARAMETERIZED_AGGREGATES['compute'][a.func]
                functions.append(compute(a.param))
            else:
                raise NotImplementedError('Selectable aggregate function %s'
                                          ' is not supported' % a.func)

        return functions

    def _make_stats_query(self, sample_filter, groupby, aggregate):

        select = [
            func.min(models.Sample.timestamp).label('tsmin'),
            func.max(models.Sample.timestamp).label('tsmax'),
            models.Meter.unit
        ]
        select.extend(self._get_aggregate_functions(aggregate))

        session = self._engine_facade.get_session()

        if groupby:
            group_attributes = [getattr(models.Resource, g) for g in groupby]
            select.extend(group_attributes)

        query = (session.query(*select)
                 .join(models.Meter,
                       models.Meter.id == models.Sample.meter_id)
                 .join(
                     models.Resource,
                     models.Resource.internal_id == models.Sample.resource_id)
                 .group_by(models.Meter.unit))

        if groupby:
            query = query.group_by(*group_attributes)

        return make_query_from_filter(session, query, sample_filter)

    @staticmethod
    def _stats_result_aggregates(result, aggregate):
        stats_args = {}
        if isinstance(result.count, (int, long)):
            stats_args['count'] = result.count
        for attr in ['min', 'max', 'sum', 'avg']:
            if hasattr(result, attr):
                stats_args[attr] = getattr(result, attr)
        if aggregate:
            stats_args['aggregate'] = {}
            for a in aggregate:
                key = '%s%s' % (a.func, '/%s' % a.param if a.param else '')
                stats_args['aggregate'][key] = getattr(result, key)
        return stats_args

    @staticmethod
    def _stats_result_to_model(result, period, period_start,
                               period_end, groupby, aggregate):
        stats_args = Connection._stats_result_aggregates(result, aggregate)
        stats_args['unit'] = result.unit
        duration = (timeutils.delta_seconds(result.tsmin, result.tsmax)
                    if result.tsmin is not None and result.tsmax is not None
                    else None)
        stats_args['duration'] = duration
        stats_args['duration_start'] = result.tsmin
        stats_args['duration_end'] = result.tsmax
        stats_args['period'] = period
        stats_args['period_start'] = period_start
        stats_args['period_end'] = period_end
        stats_args['groupby'] = (dict(
            (g, getattr(result, g)) for g in groupby) if groupby else None)
        return api_models.Statistics(**stats_args)

    def get_meter_statistics(self, sample_filter, period=None, groupby=None,
                             aggregate=None):
        """Return an iterable of api_models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if groupby:
            for group in groupby:
                if group not in ['user_id', 'project_id', 'resource_id']:
                    raise NotImplementedError('Unable to group by '
                                              'these fields')

        if not period:
            for res in self._make_stats_query(sample_filter,
                                              groupby,
                                              aggregate):
                if res.count:
                    yield self._stats_result_to_model(res, 0,
                                                      res.tsmin, res.tsmax,
                                                      groupby,
                                                      aggregate)
            return

        if not sample_filter.start or not sample_filter.end:
            res = self._make_stats_query(sample_filter,
                                         None,
                                         aggregate).first()
            if not res:
                # NOTE(liusheng):The 'res' may be NoneType, because no
                # sample has found with sample filter(s).
                return

        query = self._make_stats_query(sample_filter, groupby, aggregate)
        # HACK(jd) This is an awful method to compute stats by period, but
        # since we're trying to be SQL agnostic we have to write portable
        # code, so here it is, admire! We're going to do one request to get
        # stats by period. We would like to use GROUP BY, but there's no
        # portable way to manipulate timestamp in SQL, so we can't.
        for period_start, period_end in base.iter_period(
                sample_filter.start or res.tsmin,
                sample_filter.end or res.tsmax,
                period):
            q = query.filter(models.Sample.timestamp >= period_start)
            q = q.filter(models.Sample.timestamp < period_end)
            for r in q.all():
                if r.count:
                    yield self._stats_result_to_model(
                        result=r,
                        period=int(timeutils.delta_seconds(period_start,
                                                           period_end)),
                        period_start=period_start,
                        period_end=period_end,
                        groupby=groupby,
                        aggregate=aggregate
                    )

    def _get_or_create_trait_type(self, trait_type, data_type, session=None):
        """Find if this trait already exists in the database.

        If it does not, create a new entry in the trait type table.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            tt = session.query(models.TraitType).filter(
                models.TraitType.desc == trait_type,
                models.TraitType.data_type == data_type).first()
            if not tt:
                tt = models.TraitType(trait_type, data_type)
                session.add(tt)
        return tt

    def _make_trait(self, trait_model, event, session=None):
        """Make a new Trait from a Trait model.

        Doesn't flush or add to session.
        """
        trait_type = self._get_or_create_trait_type(trait_model.name,
                                                    trait_model.dtype,
                                                    session)
        value_map = models.Trait._value_map
        values = {'t_string': None, 't_float': None,
                  't_int': None, 't_datetime': None}
        value = trait_model.value
        values[value_map[trait_model.dtype]] = value
        return models.Trait(trait_type, event, **values)

    def _get_or_create_event_type(self, event_type, session=None):
        """Check if an event type with the supplied name is already exists.

        If not, we create it and return the record. This may result in a flush.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            et = session.query(models.EventType).filter(
                models.EventType.desc == event_type).first()
            if not et:
                et = models.EventType(event_type)
                session.add(et)
        return et

    def _record_event(self, session, event_model):
        """Store a single Event, including related Traits."""
        with session.begin(subtransactions=True):
            event_type = self._get_or_create_event_type(event_model.event_type,
                                                        session=session)

            event = models.Event(event_model.message_id, event_type,
                                 event_model.generated)
            session.add(event)

            new_traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    t = self._make_trait(trait, event, session=session)
                    session.add(t)
                    new_traits.append(t)

        # Note: we don't flush here, explicitly (unless a new trait or event
        # does it). Otherwise, just wait until all the Events are staged.
        return event, new_traits

    def record_events(self, event_models):
        """Write the events to SQL database via sqlalchemy.

        :param event_models: a list of model.Event objects.

        Returns a list of events that could not be saved in a
        (reason, event) tuple. Reasons are enumerated in
        storage.model.Event

        Flush when they're all added, unless new EventTypes or
        TraitTypes are added along the way.
        """
        session = self._engine_facade.get_session()
        events = []
        problem_events = []
        for event_model in event_models:
            event = None
            try:
                with session.begin():
                    event = self._record_event(session, event_model)
            except dbexc.DBDuplicateEntry as e:
                LOG.exception(_("Failed to record duplicated event: %s") % e)
                problem_events.append((api_models.Event.DUPLICATE,
                                       event_model))
            except Exception as e:
                LOG.exception(_('Failed to record event: %s') % e)
                problem_events.append((api_models.Event.UNKNOWN_PROBLEM,
                                       event_model))
            events.append(event)
        return problem_events

    def get_events(self, event_filter):
        """Return an iterable of model.Event objects.

        :param event_filter: EventFilter instance
        """

        start = event_filter.start_time
        end = event_filter.end_time
        session = self._engine_facade.get_session()
        LOG.debug(_("Getting events that match filter: %s") % event_filter)
        with session.begin():
            event_query = session.query(models.Event)

            # Build up the join conditions
            event_join_conditions = [models.EventType.id ==
                                     models.Event.event_type_id]

            if event_filter.event_type:
                event_join_conditions.append(models.EventType.desc ==
                                             event_filter.event_type)

            event_query = event_query.join(models.EventType,
                                           and_(*event_join_conditions))

            # Build up the where conditions
            event_filter_conditions = []
            if event_filter.message_id:
                event_filter_conditions.append(models.Event.message_id ==
                                               event_filter.message_id)
            if start:
                event_filter_conditions.append(models.Event.generated >= start)
            if end:
                event_filter_conditions.append(models.Event.generated <= end)

            if event_filter_conditions:
                event_query = (event_query.
                               filter(and_(*event_filter_conditions)))

            event_models_dict = {}
            if event_filter.traits_filter:
                for trait_filter in event_filter.traits_filter:

                    # Build a sub query that joins Trait to TraitType
                    # where the trait name matches
                    trait_name = trait_filter.pop('key')
                    op = trait_filter.pop('op', 'eq')
                    conditions = [models.Trait.trait_type_id ==
                                  models.TraitType.id,
                                  models.TraitType.desc == trait_name]

                    for key, value in six.iteritems(trait_filter):
                        sql_utils.trait_op_condition(conditions,
                                                     key, value, op)

                    trait_query = (session.query(models.Trait.event_id).
                                   join(models.TraitType,
                                        and_(*conditions)).subquery())

                    event_query = (event_query.
                                   join(trait_query, models.Event.id ==
                                        trait_query.c.event_id))
            else:
                # If there are no trait filters, grab the events from the db
                query = (session.query(models.Event.id,
                                       models.Event.generated,
                                       models.Event.message_id,
                                       models.EventType.desc).
                         join(models.EventType, and_(*event_join_conditions)))
                if event_filter_conditions:
                    query = query.filter(and_(*event_filter_conditions))
                for (id_, generated, message_id, desc_) in query.all():
                    event_models_dict[id_] = api_models.Event(message_id,
                                                              desc_,
                                                              generated,
                                                              [])

            # Build event models for the events
            event_query = event_query.subquery()
            query = (session.query(models.Trait).
                     join(models.TraitType, models.Trait.trait_type_id ==
                          models.TraitType.id).
                     join(event_query, models.Trait.event_id ==
                          event_query.c.id))

            # Now convert the sqlalchemy objects back into Models ...
            for trait in query.all():
                event = event_models_dict.get(trait.event_id)
                if not event:
                    event = api_models.Event(
                        trait.event.message_id,
                        trait.event.event_type.desc,
                        trait.event.generated, [])
                    event_models_dict[trait.event_id] = event
                trait_model = api_models.Trait(trait.trait_type.desc,
                                               trait.trait_type.data_type,
                                               trait.get_value())
                event.append_trait(trait_model)

        event_models = event_models_dict.values()
        return sorted(event_models, key=operator.attrgetter('generated'))

    def get_event_types(self):
        """Return all event types as an iterable of strings."""

        session = self._engine_facade.get_session()
        with session.begin():
            query = (session.query(models.EventType.desc).
                     order_by(models.EventType.desc))
            for name in query.all():
                # The query returns a tuple with one element.
                yield name[0]

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.
        :param event_type: the type of the Event
        """
        session = self._engine_facade.get_session()

        LOG.debug(_("Get traits for %s") % event_type)
        with session.begin():
            query = (session.query(models.TraitType.desc,
                                   models.TraitType.data_type)
                     .join(models.Trait,
                           models.Trait.trait_type_id ==
                           models.TraitType.id)
                     .join(models.Event,
                           models.Event.id ==
                           models.Trait.event_id)
                     .join(models.EventType,
                           and_(models.EventType.id ==
                                models.Event.id,
                                models.EventType.desc ==
                                event_type))
                     .group_by(models.TraitType.desc,
                               models.TraitType.data_type)
                     .distinct())

            for desc_, dtype in query.all():
                yield {'name': desc_, 'data_type': dtype}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """

        session = self._engine_facade.get_session()
        with session.begin():
            trait_type_filters = [models.TraitType.id ==
                                  models.Trait.trait_type_id]
            if trait_type:
                trait_type_filters.append(models.TraitType.desc == trait_type)

            query = (session.query(models.Trait)
                     .join(models.TraitType, and_(*trait_type_filters))
                     .join(models.Event,
                           models.Event.id == models.Trait.event_id)
                     .join(models.EventType,
                           and_(models.EventType.id ==
                                models.Event.event_type_id,
                                models.EventType.desc == event_type)))

            for trait in query.all():
                type = trait.trait_type
                yield api_models.Trait(name=type.desc,
                                       dtype=type.data_type,
                                       value=trait.get_value())
Ejemplo n.º 28
0
class Connection(base.Connection):
    """Get Ceilometer data from InfluxDB and ElasticSearch databases.

    Samples are stored in the following format in InfluxDB:
    - measurement: sample
    - tags (indexed): user_id, resource_id, project_id, source and
    configured metadata fields
    - fields (not indexed): counter_type -> type, counter_unit -> unit,
    counter_volume -> value, counter_name -> meter, message_id,
    message_signature, timestamp and recorded_at.

    Resources and meters are stored in ElasticSearch.
    Resources:
     {
      "_index": "ceilometer_resource",
      "_type": "<source>",
      "_id": "<resource_id>",
      "_source":{
          "first_sample_timestamp": "<datetime in isoformat>",
          "last_sample_timestamp": "<datetime in isoformat>",
          "project_id": "<project_id>",
          "user_id": "<user_id>",
          "metadata": {
              "foo" : "bar",
              "foofoo" : {"barbar": {"foo": "bar"}}
          },
          "meters": {"<meter_name>": {"unit": "<meter_unit>",
                                      "type": "<meter_type>"}
       }
    }

    This class has 'record_metering_data' implementation, but it is used only
    for testing needs. In real life, data will be recorded by StackLight

    """

    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    resource_index = "ceilometer_resource"

    _refresh_on_write = False

    def __init__(self, url):
        if cfg.CONF.database.resource_connection:
            url_split = netutils.urlsplit(
                cfg.CONF.database.resource_connection)
            self.resource_connection = es.Elasticsearch(url_split.netloc)
        else:
            self.resource_connection = None

        user, pwd, host, port, self.database = influx_utils.split_url(url)
        self.sample_connection = influxdb.InfluxDBClient(
            host, port, user, pwd, self.database)

    def upgrade(self):
        self.upgrade_resource_database()
        self.upgrade_sample_database()

    def upgrade_resource_database(self):
        if not self.resource_connection:
            return

        iclient = es.client.IndicesClient(self.resource_connection)
        template = {
            'template': 'ceilometer_*',
            'mappings': {
                '_default_': {
                    'properties': {
                        'first_sample_timestamp': {
                            'type': 'date'
                        },
                        'last_sample_timestamp': {
                            'type': 'date'
                        },
                    },
                    "dynamic_templates": [{
                        "string_fields": {
                            "match": "*",
                            "match_mapping_type": "string",
                            "mapping": {
                                "type": "string",
                                "index": "not_analyzed"
                            }
                        }
                    }]
                }
            }
        }
        iclient.put_template(name='ceilometer_resource_template',
                             body=template)
        iclient.create(self.resource_index)

    def upgrade_sample_database(self):
        try:
            self.sample_connection.create_database(self.database)
        except influxdb.exceptions.InfluxDBClientError as e:
            if "database already exists" not in e.content:
                raise
        self.sample_connection.create_retention_policy(
            name=influx_utils.RETENTION_POLICY_NAME,
            duration="INF",
            replication=cfg.CONF.database.influxdb_replication,
            database=self.database,
            default=True)
        if cfg.CONF.database.metering_time_to_live > 0:
            duration = "%ss" % cfg.CONF.database.metering_time_to_live
            self.sample_connection.alter_retention_policy(
                name=influx_utils.RETENTION_POLICY_NAME,
                database=self.database,
                duration=duration,
                replication=cfg.CONF.database.influxdb_replication,
                default=True)

    def get_meters(self,
                   user=None,
                   project=None,
                   resource=None,
                   source=None,
                   metaquery=None,
                   limit=None,
                   unique=None):
        if not self.resource_connection:
            raise base.NoResultFound(
                "Resource connection url is not defined and "
                "meter requests could not be processed")

        if limit == 0:
            return

        q_args = es_utils.make_query(self.resource_index,
                                     resource=resource,
                                     user=user,
                                     project=project,
                                     source=source,
                                     metaquery=metaquery,
                                     limit=limit)
        results = self.resource_connection.search(
            fields=['_type', '_id', '_source'], **q_args)
        return es_utils.search_results_to_meters(results, limit, unique)

    def get_resources(self,
                      user=None,
                      project=None,
                      source=None,
                      start_timestamp=None,
                      start_timestamp_op=None,
                      end_timestamp=None,
                      end_timestamp_op=None,
                      metaquery=None,
                      resource=None,
                      limit=None):
        if not self.resource_connection:
            raise base.NoResultFound(
                "Resource connection url is not defined and "
                "resource requests could not be processed")

        if limit == 0:
            return

        q_args = es_utils.make_query(self.resource_index, user, project,
                                     source, start_timestamp,
                                     start_timestamp_op, end_timestamp,
                                     end_timestamp_op, metaquery, resource,
                                     limit)
        results = self.resource_connection.search(
            fields=['_type', '_id', '_source'], **q_args)
        return es_utils.search_results_to_resources(results)

    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):

        # Note InfluxDB should have a lower time bound in query,
        # otherwise it will be defined as 1970-01-01T00:00:00.
        if (groupby and set(groupby) - set([
                'user_id', 'project_id', 'resource_id', 'source',
                'resource_metadata.instance_type'
        ])):
            raise ceilometer.NotImplementedError(
                "Unable to group by these fields")
        if any([aggr.func == 'cardinality' for aggr in (aggregate or [])]):
            raise ceilometer.NotImplementedError(
                "Cardinality aggregation is not supported "
                "by StackLight backends")
        try:
            if (not sample_filter.start_timestamp
                    or not sample_filter.end_timestamp):
                first, last = self.get_time_boundary(sample_filter)
                sample_filter.start_timestamp = \
                    sample_filter.start_timestamp or first
            unit = self.get_unit(sample_filter)
        except base.NoResultFound:
            return []

        query = influx_utils.make_aggregate_query(sample_filter, period,
                                                  groupby, aggregate)
        response = self._query(query)
        stats = []
        for serie, points in response.items():
            measurement, tags = serie
            for point in points or []:
                stats.append(
                    influx_utils.point_to_stat(point, tags, period, aggregate,
                                               unit))
        return [stat for stat in stats if stat]

    def get_samples(self, sample_filter, limit=None):
        if limit is 0:
            return
        response = self._query(
            influx_utils.make_list_query(sample_filter, limit))
        for point in response.get_points(influx_utils.MEASUREMENT):
            yield influx_utils.point_to_sample(point)

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        q = influx_utils.make_complex_query(filter_expr, limit)
        response = self._query(q)
        samples = []
        for point in response.get_points(influx_utils.MEASUREMENT):
            samples.append(influx_utils.point_to_sample(point))
        return influx_utils.sort_samples(samples, orderby)

    def get_unit(self, sample_filter):
        meter = sample_filter.meter
        if meter in units.UNITS_BY_METRIC:
            return units.UNITS_BY_METRIC[meter]
        response = self._query(influx_utils.make_unit_query(sample_filter))
        try:
            point = response.get_points(influx_utils.MEASUREMENT).next()
        except StopIteration:
            raise base.NoResultFound()

        units.UNITS_BY_METRIC[meter] = point['unit']
        return point['unit']

    def get_time_boundary(self, sample_filter):
        """Find timestamp of the first matching sample in the database."""

        response = self._query(
            influx_utils.make_time_bounds_query(sample_filter))
        try:
            first_point = response.get_points(influx_utils.MEASUREMENT).next()
        except StopIteration:
            raise base.NoResultFound()

        start_timestamp = utils.sanitize_timestamp(first_point['first'])
        end_timestamp = utils.sanitize_timestamp(first_point['last'])
        return start_timestamp, end_timestamp

    def _query(self, q):
        """Make a query to InfluxDB database.

          :param q: Query string in InfluxDB query format.
          :returns a response ResultSet
        """
        LOG.debug("InfluxDB query requested: %s" % q)
        try:
            return self.sample_connection.query(q)
        except influxdb.exceptions.InfluxDBClientError as e:
            LOG.exception(_LE("Client error during the InfluxDB query: %s"), e)
            return influxdb.resultset.ResultSet({})

    def record_metering_data(self, data):
        """Records data into databases

        Method is needed for testing needs only. In real life, data will be
        written to the databases by StackLight.
        """
        data['counter_name'] = utils.decode_unicode(data['counter_name'])
        self.resource_connection.update(index=self.resource_index,
                                        doc_type='source',
                                        id=data['resource_id'],
                                        body=es_utils.sample_to_resource(data))
        self.sample_connection.write_points(
            [influx_utils.sample_to_point(data)], "n", self.database,
            influx_utils.RETENTION_POLICY_NAME)
        if self._refresh_on_write:
            self.resource_connection.indices.refresh(self.resource_index)
            while self.resource_connection.cluster.pending_tasks(
                    local=True)['tasks']:
                pass

    def clear(self):
        self.resource_connection.indices.delete(index=self.resource_index,
                                                ignore=[400, 404])
        self.sample_connection.drop_database(self.database)
Ejemplo n.º 29
0
 def __init__(self, conf, AVAILABLE_CAPABILITIES):
     """Constructor."""
     self._CAPABILITIES = utils.update_nested(self.DEFAULT_CAPABILITIES,
                                              AVAILABLE_CAPABILITIES)
Ejemplo n.º 30
0
class Connection(base.Connection):
    """Put the event data into a SQLAlchemy database.

    Tables::

        - EventType
          - event definition
          - { id: event type id
              desc: description of event
              }
        - Event
          - event data
          - { id: event id
              message_id: message id
              generated = timestamp of event
              event_type_id = event type -> eventtype.id
              }
        - TraitInt
          - int trait value
          - { event_id: event -> event.id
              key: trait name
              value: integer value
              }
        - TraitDatetime
          - datetime trait value
          - { event_id: event -> event.id
              key: trait name
              value: datetime value
              }
        - TraitText
          - text trait value
          - { event_id: event -> event.id
              key: trait name
              value: text value
              }
        - TraitFloat
          - float trait value
          - { event_id: event -> event.id
              key: trait name
              value: float value
              }

    """
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       AVAILABLE_CAPABILITIES)
    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def __init__(self, url):
        # Set max_retries to 0, since oslo.db in certain cases may attempt
        # to retry making the db connection retried max_retries ^ 2 times
        # in failure case and db reconnection has already been implemented
        # in storage.__init__.get_connection_from_config function
        options = dict(cfg.CONF.database.items())
        options['max_retries'] = 0
        self._engine_facade = db_session.EngineFacade(url, **options)

    def upgrade(self):
        # NOTE(gordc): to minimise memory, only import migration when needed
        from oslo_db.sqlalchemy import migration
        path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                            '..', '..', 'storage', 'sqlalchemy',
                            'migrate_repo')
        migration.db_sync(self._engine_facade.get_engine(), path)

    def clear(self):
        engine = self._engine_facade.get_engine()
        for table in reversed(models.Base.metadata.sorted_tables):
            engine.execute(table.delete())
        engine.dispose()

    def _get_or_create_event_type(self, event_type, session=None):
        """Check if an event type with the supplied name is already exists.

        If not, we create it and return the record. This may result in a flush.
        """
        if session is None:
            session = self._engine_facade.get_session()
        with session.begin(subtransactions=True):
            et = session.query(models.EventType).filter(
                models.EventType.desc == event_type).first()
            if not et:
                et = models.EventType(event_type)
                session.add(et)
        return et

    def record_events(self, event_models):
        """Write the events to SQL database via sqlalchemy.

        :param event_models: a list of model.Event objects.
        """
        session = self._engine_facade.get_session()
        error = None
        for event_model in event_models:
            event = None
            try:
                with session.begin():
                    event_type = self._get_or_create_event_type(
                        event_model.event_type, session=session)
                    event = models.Event(event_model.message_id, event_type,
                                         event_model.generated,
                                         event_model.raw)
                    session.add(event)
                    session.flush()

                    if event_model.traits:
                        trait_map = {}
                        for trait in event_model.traits:
                            if trait_map.get(trait.dtype) is None:
                                trait_map[trait.dtype] = []
                            trait_map[trait.dtype].append(
                                {'event_id': event.id,
                                 'key': trait.name,
                                 'value': trait.value})
                        for dtype in trait_map.keys():
                            model = TRAIT_ID_TO_MODEL[dtype]
                            session.execute(model.__table__.insert(),
                                            trait_map[dtype])
            except dbexc.DBDuplicateEntry as e:
                LOG.info(_LI("Duplicate event detected, skipping it: %s") % e)
            except KeyError as e:
                LOG.exception(_LE('Failed to record event: %s') % e)
            except Exception as e:
                LOG.exception(_LE('Failed to record event: %s') % e)
                error = e
        if error:
            raise error

    def get_events(self, event_filter, limit=None):
        """Return an iterable of model.Event objects.

        :param event_filter: EventFilter instance
        """
        if limit == 0:
            return
        session = self._engine_facade.get_session()
        with session.begin():
            # Build up the join conditions
            event_join_conditions = [models.EventType.id ==
                                     models.Event.event_type_id]

            if event_filter.event_type:
                event_join_conditions.append(models.EventType.desc ==
                                             event_filter.event_type)

            # Build up the where conditions
            event_filter_conditions = []
            if event_filter.message_id:
                event_filter_conditions.append(
                    models.Event.message_id == event_filter.message_id)
            if event_filter.start_timestamp:
                event_filter_conditions.append(
                    models.Event.generated >= event_filter.start_timestamp)
            if event_filter.end_timestamp:
                event_filter_conditions.append(
                    models.Event.generated <= event_filter.end_timestamp)

            trait_subq = None
            # Build trait filter
            if event_filter.traits_filter:
                filters = list(event_filter.traits_filter)
                trait_filter = filters.pop()
                key = trait_filter.pop('key')
                op = trait_filter.pop('op', 'eq')
                trait_type, value = list(trait_filter.items())[0]
                trait_subq = _build_trait_query(session, trait_type,
                                                key, value, op)
                for trait_filter in filters:
                    key = trait_filter.pop('key')
                    op = trait_filter.pop('op', 'eq')
                    trait_type, value = list(trait_filter.items())[0]
                    q = _build_trait_query(session, trait_type,
                                           key, value, op)
                    trait_subq = trait_subq.filter(
                        trait_subq.subquery().c.ev_id == q.subquery().c.ev_id)
                trait_subq = trait_subq.subquery()

            query = (session.query(models.Event.id)
                     .join(models.EventType,
                           sa.and_(*event_join_conditions)))
            if trait_subq is not None:
                query = query.join(trait_subq,
                                   trait_subq.c.ev_id == models.Event.id)
            if event_filter.admin_proj:
                no_proj_q = session.query(models.TraitText.event_id).filter(
                    models.TraitText.key == 'project_id')
                admin_q = (session.query(models.TraitText.event_id).filter(
                    ~sa.exists().where(models.TraitText.event_id ==
                                       no_proj_q.subquery().c.event_id)).union(
                    session.query(models.TraitText.event_id).filter(sa.and_(
                        models.TraitText.key == 'project_id',
                        models.TraitText.value == event_filter.admin_proj,
                        models.Event.id == models.TraitText.event_id))))
                query = query.filter(sa.exists().where(
                    models.Event.id ==
                    admin_q.subquery().c.trait_text_event_id))
            if event_filter_conditions:
                query = query.filter(sa.and_(*event_filter_conditions))

            query = query.order_by(models.Event.generated).limit(limit)
            event_list = {}
            # get a list of all events that match filters
            for (id_, generated, message_id,
                 desc, raw) in query.add_columns(
                     models.Event.generated, models.Event.message_id,
                     models.EventType.desc, models.Event.raw).all():
                event_list[id_] = api_models.Event(message_id, desc,
                                                   generated, [], raw)
            # Query all traits related to events.
            # NOTE (gordc): cast is done because pgsql defaults to TEXT when
            #               handling unknown values such as null.
            trait_q = (
                session.query(
                    models.TraitDatetime.event_id,
                    models.TraitDatetime.key, models.TraitDatetime.value,
                    sa.cast(sa.null(), sa.Integer),
                    sa.cast(sa.null(), sa.Float(53)),
                    sa.cast(sa.null(), sa.String(255)))
                .filter(sa.exists().where(
                    models.TraitDatetime.event_id == query.subquery().c.id))
            ).union_all(
                session.query(
                    models.TraitInt.event_id,
                    models.TraitInt.key, sa.null(),
                    models.TraitInt.value, sa.null(), sa.null())
                .filter(sa.exists().where(
                    models.TraitInt.event_id == query.subquery().c.id)),
                session.query(
                    models.TraitFloat.event_id,
                    models.TraitFloat.key, sa.null(), sa.null(),
                    models.TraitFloat.value, sa.null())
                .filter(sa.exists().where(
                    models.TraitFloat.event_id == query.subquery().c.id)),
                session.query(
                    models.TraitText.event_id,
                    models.TraitText.key, sa.null(), sa.null(), sa.null(),
                    models.TraitText.value)
                .filter(sa.exists().where(
                    models.TraitText.event_id == query.subquery().c.id)))

            for id_, key, t_date, t_int, t_float, t_text in (
                    trait_q.order_by(models.TraitDatetime.key)).all():
                if t_int is not None:
                    dtype = api_models.Trait.INT_TYPE
                    val = t_int
                elif t_float is not None:
                    dtype = api_models.Trait.FLOAT_TYPE
                    val = t_float
                elif t_date is not None:
                    dtype = api_models.Trait.DATETIME_TYPE
                    val = t_date
                else:
                    dtype = api_models.Trait.TEXT_TYPE
                    val = t_text

                try:
                    trait_model = api_models.Trait(key, dtype, val)
                    event_list[id_].append_trait(trait_model)
                except KeyError:
                    # NOTE(gordc): this is expected as we do not set REPEATABLE
                    # READ (bug 1506717). if query is run while recording new
                    # event data, trait query may return more data than event
                    # query. they can be safely discarded.
                    pass

            return event_list.values()

    def get_event_types(self):
        """Return all event types as an iterable of strings."""

        session = self._engine_facade.get_session()
        with session.begin():
            query = (session.query(models.EventType.desc).
                     order_by(models.EventType.desc))
            for name in query.all():
                # The query returns a tuple with one element.
                yield name[0]

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.
        :param event_type: the type of the Event
        """
        session = self._engine_facade.get_session()

        with session.begin():
            for trait_model in [models.TraitText, models.TraitInt,
                                models.TraitFloat, models.TraitDatetime]:
                query = (session.query(trait_model.key)
                         .join(models.Event,
                               models.Event.id == trait_model.event_id)
                         .join(models.EventType,
                               sa.and_(models.EventType.id ==
                                       models.Event.event_type_id,
                                       models.EventType.desc == event_type))
                         .distinct())

                dtype = TRAIT_MODEL_TO_ID.get(trait_model)
                for row in query.all():
                    yield {'name': row[0], 'data_type': dtype}

    def get_traits(self, event_type, trait_type=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.
        :param event_type: the type of the Event to filter by
        :param trait_type: the name of the Trait to filter by
        """

        session = self._engine_facade.get_session()
        with session.begin():
            for trait_model in [models.TraitText, models.TraitInt,
                                models.TraitFloat, models.TraitDatetime]:
                query = (session.query(trait_model.key, trait_model.value)
                         .join(models.Event,
                               models.Event.id == trait_model.event_id)
                         .join(models.EventType,
                               sa.and_(models.EventType.id ==
                                       models.Event.event_type_id,
                                       models.EventType.desc == event_type))
                         .order_by(trait_model.key))
                if trait_type:
                    query = query.filter(trait_model.key == trait_type)

                dtype = TRAIT_MODEL_TO_ID.get(trait_model)
                for k, v in query.all():
                    yield api_models.Trait(name=k,
                                           dtype=dtype,
                                           value=v)

    def clear_expired_event_data(self, ttl):
        """Clear expired data from the backend storage system.

        Clearing occurs according to the time-to-live.

        :param ttl: Number of seconds to keep records for.
        """
        session = self._engine_facade.get_session()
        with session.begin():
            end = timeutils.utcnow() - datetime.timedelta(seconds=ttl)
            event_q = (session.query(models.Event.id)
                       .filter(models.Event.generated < end))

            event_subq = event_q.subquery()
            for trait_model in [models.TraitText, models.TraitInt,
                                models.TraitFloat, models.TraitDatetime]:
                (session.query(trait_model)
                 .filter(trait_model.event_id.in_(event_subq))
                 .delete(synchronize_session="fetch"))
            event_rows = event_q.delete()

            # remove EventType and TraitType with no corresponding
            # matching events and traits
            (session.query(models.EventType)
             .filter(~models.EventType.events.any())
             .delete(synchronize_session="fetch"))
            LOG.info(_LI("%d events are removed from database"), event_rows)
Ejemplo n.º 31
0
class Connection(base.Connection):
    """Base Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def get_meters(self, user=None, project=None, resource=None, source=None,
                   metaquery=None, pagination=None):
        """Return an iterable of models.Meter instances

        :param user: Optional ID for user that owns the resource.
        :param project: Optional ID for project that owns the resource.
        :param resource: Optional resource filter.
        :param source: Optional source filter.
        :param metaquery: Optional dict with metadata to match on.
        :param pagination: Optional pagination query.
        """

        if pagination:
            raise ceilometer.NotImplementedError('Pagination not implemented')

        metaquery = metaquery or {}

        q = {}
        if user is not None:
            q['user_id'] = user
        if project is not None:
            q['project_id'] = project
        if resource is not None:
            q['_id'] = resource
        if source is not None:
            q['source'] = source
        q.update(metaquery)

        for r in self.db.resource.find(q):
            for r_meter in r['meter']:
                yield models.Meter(
                    name=r_meter['counter_name'],
                    type=r_meter['counter_type'],
                    # Return empty string if 'counter_unit' is not valid for
                    # backward compatibility.
                    unit=r_meter.get('counter_unit', ''),
                    resource_id=r['_id'],
                    project_id=r['project_id'],
                    source=r['source'],
                    user_id=r['user_id'],
                )

    def get_samples(self, sample_filter, limit=None):
        """Return an iterable of model.Sample instances.

        :param sample_filter: Filter.
        :param limit: Maximum number of results to return.
        """
        if limit == 0:
            return []
        q = pymongo_utils.make_query_from_filter(sample_filter,
                                                 require_meter=False)

        return self._retrieve_samples(q,
                                      [("timestamp", pymongo.DESCENDING)],
                                      limit)

    def record_events(self, event_models):
        """Write the events to database.

        Return a list of events of type models.Event.DUPLICATE in case of
        trying to write an already existing event to the database, or
        models.Event.UNKONW_PROBLEM in case of any failures with recording the
        event in the database.

        :param event_models: a list of models.Event objects.
        """
        problem_events = []
        for event_model in event_models:
            traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    traits.append({'trait_name': trait.name,
                                   'trait_type': trait.dtype,
                                   'trait_value': trait.value})
            try:
                self.db.event.insert(
                    {'_id': event_model.message_id,
                     'event_type': event_model.event_type,
                     'timestamp': event_model.generated,
                     'traits': traits})
            except pymongo.errors.DuplicateKeyError as ex:
                LOG.exception(_("Failed to record duplicated event: %s") % ex)
                problem_events.append((ev_models.Event.DUPLICATE,
                                       event_model))
            except Exception as ex:
                LOG.exception(_("Failed to record event: %s") % ex)
                problem_events.append((ev_models.Event.UNKNOWN_PROBLEM,
                                       event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return an iter of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
                             for events that are stored in database.
        """
        q = pymongo_utils.make_events_query_from_filter(event_filter)
        for event in self.db.event.find(q):
            traits = []
            for trait in event['traits']:
                traits.append(
                    ev_models.Trait(name=trait['trait_name'],
                                    dtype=int(trait['trait_type']),
                                    value=trait['trait_value']))
            yield ev_models.Event(message_id=event['_id'],
                                  event_type=event['event_type'],
                                  generated=event['timestamp'],
                                  traits=traits)

    def get_event_types(self):
        """Return all event types as an iter of strings."""
        event_types = set()
        events = self.db.event.find()

        for event in events:
            event_type = event['event_type']
            if event_type not in event_types:
                event_types.add(event_type)
                yield event_type

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event.
        """
        trait_names = set()
        events = self.db.event.find({'event_type': event_type})

        for event in events:
            for trait in event['traits']:
                trait_name = trait['trait_name']
                if trait_name not in trait_names:
                    # Here we check that our method return only unique
                    # trait types. Method will return only one trait type. It
                    # is proposed that certain trait name could have only one
                    # trait type.
                    trait_names.add(trait_name)
                    yield {'name': trait_name,
                           'data_type': trait['trait_type']}

    def get_traits(self, event_type, trait_name=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.

        :param event_type: the type of the Event to filter by
        :param trait_name: the name of the Trait to filter by
        """
        if not trait_name:
            events = self.db.event.find({'event_type': event_type})
        else:
            # We choose events that simultaneously have event_type and certain
            # trait_name, and retrieve events contains only mentioned traits.
            events = self.db.event.find({'$and': [{'event_type': event_type},
                                        {'traits.trait_name': trait_name}]},
                                        {'traits': {'$elemMatch':
                                                    {'trait_name': trait_name}}
                                         })
        for event in events:
            for trait in event['traits']:
                yield ev_models.Trait(name=trait['trait_name'],
                                      dtype=trait['trait_type'],
                                      value=trait['trait_value'])

    def query_samples(self, filter_expr=None, orderby=None, limit=None):
        if limit == 0:
            return []
        query_filter = {}
        orderby_filter = [("timestamp", pymongo.DESCENDING)]
        transformer = pymongo_utils.QueryTransformer()
        if orderby is not None:
            orderby_filter = transformer.transform_orderby(orderby)
        if filter_expr is not None:
            query_filter = transformer.transform_filter(filter_expr)

        return self._retrieve_samples(query_filter, orderby_filter, limit)

    def _retrieve_samples(self, query, orderby, limit):
        if limit is not None:
            samples = self.db.meter.find(query,
                                         limit=limit,
                                         sort=orderby)
        else:
            samples = self.db.meter.find(query,
                                         sort=orderby)

        for s in samples:
            # Remove the ObjectId generated by the database when
            # the sample was inserted. It is an implementation
            # detail that should not leak outside of the driver.
            del s['_id']
            # Backward compatibility for samples without units
            s['counter_unit'] = s.get('counter_unit', '')
            # Tolerate absence of recorded_at in older datapoints
            s['recorded_at'] = s.get('recorded_at')
            yield models.Sample(**s)
Ejemplo n.º 32
0
class Connection(base.Connection):
    """Base event Connection class for MongoDB and DB2 drivers."""
    CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES,
                                       COMMON_AVAILABLE_CAPABILITIES)

    STORAGE_CAPABILITIES = utils.update_nested(
        base.Connection.STORAGE_CAPABILITIES,
        AVAILABLE_STORAGE_CAPABILITIES,
    )

    def record_events(self, event_models):
        """Write the events to database.

        Return a list of events of type models.Event.DUPLICATE in case of
        trying to write an already existing event to the database, or
        models.Event.UNKONW_PROBLEM in case of any failures with recording the
        event in the database.

        :param event_models: a list of models.Event objects.
        """
        problem_events = []
        for event_model in event_models:
            traits = []
            if event_model.traits:
                for trait in event_model.traits:
                    traits.append({
                        'trait_name': trait.name,
                        'trait_type': trait.dtype,
                        'trait_value': trait.value
                    })
            try:
                self.db.event.insert({
                    '_id': event_model.message_id,
                    'event_type': event_model.event_type,
                    'timestamp': event_model.generated,
                    'traits': traits
                })
            except pymongo.errors.DuplicateKeyError as ex:
                LOG.exception(_("Failed to record duplicated event: %s") % ex)
                problem_events.append((models.Event.DUPLICATE, event_model))
            except Exception as ex:
                LOG.exception(_("Failed to record event: %s") % ex)
                problem_events.append(
                    (models.Event.UNKNOWN_PROBLEM, event_model))
        return problem_events

    def get_events(self, event_filter):
        """Return an iter of models.Event objects.

        :param event_filter: storage.EventFilter object, consists of filters
                             for events that are stored in database.
        """
        q = pymongo_utils.make_events_query_from_filter(event_filter)
        for event in self.db.event.find(q):
            traits = []
            for trait in event['traits']:
                traits.append(
                    models.Trait(name=trait['trait_name'],
                                 dtype=int(trait['trait_type']),
                                 value=trait['trait_value']))
            yield models.Event(message_id=event['_id'],
                               event_type=event['event_type'],
                               generated=event['timestamp'],
                               traits=traits)

    def get_event_types(self):
        """Return all event types as an iter of strings."""
        event_types = set()
        events = self.db.event.find()

        for event in events:
            event_type = event['event_type']
            if event_type not in event_types:
                event_types.add(event_type)
                yield event_type

    def get_trait_types(self, event_type):
        """Return a dictionary containing the name and data type of the trait.

        Only trait types for the provided event_type are returned.

        :param event_type: the type of the Event.
        """
        trait_names = set()
        events = self.db.event.find({'event_type': event_type})

        for event in events:
            for trait in event['traits']:
                trait_name = trait['trait_name']
                if trait_name not in trait_names:
                    # Here we check that our method return only unique
                    # trait types. Method will return only one trait type. It
                    # is proposed that certain trait name could have only one
                    # trait type.
                    trait_names.add(trait_name)
                    yield {
                        'name': trait_name,
                        'data_type': trait['trait_type']
                    }

    def get_traits(self, event_type, trait_name=None):
        """Return all trait instances associated with an event_type.

        If trait_type is specified, only return instances of that trait type.

        :param event_type: the type of the Event to filter by
        :param trait_name: the name of the Trait to filter by
        """
        if not trait_name:
            events = self.db.event.find({'event_type': event_type})
        else:
            # We choose events that simultaneously have event_type and certain
            # trait_name, and retrieve events contains only mentioned traits.
            events = self.db.event.find(
                {
                    '$and': [{
                        'event_type': event_type
                    }, {
                        'traits.trait_name': trait_name
                    }]
                }, {'traits': {
                    '$elemMatch': {
                        'trait_name': trait_name
                    }
                }})
        for event in events:
            for trait in event['traits']:
                yield models.Trait(name=trait['trait_name'],
                                   dtype=trait['trait_type'],
                                   value=trait['trait_value'])