コード例 #1
0
    def __init__(self, fsid, cluster_name):
        """Requiring cluster_name and fsid is redundant (ideally everything would

        speak in terms of fsid) but convenient, because the librados interface

        wants a cluster name when you create a client, and otherwise we would

        have to look up via ceph.conf.

        """
        # getChild isn't in 2.6
        self.requested_at = now()
        self.completed_at = None

        # This is actually kind of overkill compared with having a counter,
        # somewhere but it's easy.
        self.id = uuid.uuid4().__str__()

        self._minion_id = None
        self.fsid = fsid
        self._cluster_name = cluster_name

        self.jid = None

        self.state = self.NEW
        self.result = None
        self.error = False
        self.error_message = ""

        # Time at which we last believed the current JID to be really running
        self.alive_at = None
コード例 #2
0
    def on_sync_object(self, data):

        assert data['fsid'] == self.fsid

        sync_object = data['data']

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(data['type'], data['version'],
                                             sync_object)
        if new_object:
            self._persister.update_sync_object(
                str(time.time()), self.fsid, self.name,
                sync_type.str, new_object.version if isinstance(
                    new_object.version, int) else None, now(), sync_object,
                self._manager.cluster_id)
            if sync_type.str == "osd_map":
                util_data = self._get_utilization_data()
                for raw_pool in sync_object.get('pools', []):
                    LOG.info("Updating Pool %s" % raw_pool['pool_name'])
                    for pool in util_data['pools']:
                        if pool['name'] == raw_pool['pool_name']:
                            pool_used = pool['used']
                            pcnt = pool['pcnt_used']
                    pool = Pool(updated=str(time.time()),
                                cluster_id=self._manager.cluster_id,
                                pool_id=raw_pool['pool'],
                                poolname=raw_pool['pool_name'],
                                pg_num=raw_pool['pg_num'],
                                min_size=raw_pool['min_size'],
                                used=pool_used,
                                percent_used=pcnt)
                    self._persister.update_pool(pool)
        else:
            LOG.warn("ClusterMonitor.on_sync_object: stale object"
                     " received for %s" % data['type'])
コード例 #3
0
    def fetch(self, sync_type):
        LOG.debug("SyncObjects.fetch: %s" % sync_type)

        self._fetching_at[sync_type] = now()
        # TODO(Rohan) clean up unused 'since' argument
        return ceph.get_cluster_object(self._cluster_name,
                                       sync_type.str, None)
コード例 #4
0
    def fetch(self, sync_type):
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={"message": "SyncObjects.fetch: %s" % sync_type}))

        self._fetching_at[sync_type] = now()
        # TODO(Rohan) clean up unused 'since' argument
        return ceph.get_cluster_object(self._cluster_name, sync_type.str)
コード例 #5
0
    def complete(self):
        """Call this when you're all done

        """
        assert self.state != self.COMPLETE
        assert self.jid is None

        LOG.info("Request %s completed with error=%s (%s)" %
                 (self.id, self.error, self.error_message))
        self.state = self.COMPLETE
        self.completed_at = now()
コード例 #6
0
    def on_sync_object(self, data):

        assert data['fsid'] == self.fsid

        sync_object = data['data']

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(data['type'], data['version'],
                                             sync_object)
        if new_object:
            tendrl_ns.ceph_integration.objects.SyncObject(
                updated=now(),
                sync_type=sync_type.str,
                version=new_object.version if isinstance(
                    new_object.version, int) else None,
                when=now(),
                data=sync_object).save()

            if sync_type.str == "osd_map":
                util_data = self._get_utilization_data()
                for raw_pool in sync_object.get('pools', []):
                    LOG.info("Updating Pool %s" % raw_pool['pool_name'])
                    for pool in util_data['pools']:
                        if pool['name'] == raw_pool['pool_name']:
                            pool_used = pool['used']
                            pcnt = pool['pcnt_used']
                    tendrl_ns.ceph_integration.objects.Pool(
                        pool_id=raw_pool['pool'],
                        pool_name=raw_pool['pool_name'],
                        pg_num=raw_pool['pg_num'],
                        min_size=raw_pool['min_size'],
                        used=pool_used,
                        percent_used=pcnt).save()
        else:
            LOG.warn("ClusterMonitor.on_sync_object: stale object"
                     " received for %s" % data['type'])
コード例 #7
0
    def complete(self):
        """Call this when you're all done

        """
        assert self.state != self.COMPLETE
        assert self.jid is None

        Event(
            Message(priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Request %s completed with error=%s (%s)" %
                        (self.id, self.error, self.error_message)
                    }))

        self.state = self.COMPLETE
        self.completed_at = now()
コード例 #8
0
    def on_version(self, sync_type, new_version):
        """Notify me that a particular version of a particular map exists.

        I may choose to initiate RPC to retrieve the map

        """
        LOG.debug(
            "SyncObjects.on_version %s/%s" % (sync_type.str, new_version)
        )
        old_version = self.get_version(sync_type)
        if sync_type.cmp(new_version, old_version) > 0:
            known_version = self._known_versions[sync_type]
            if sync_type.cmp(new_version, known_version) > 0:
                # We are out of date: request an up to date copy
                LOG.info("Advanced known version %s/%s %s->%s" % (
                    self._cluster_name, sync_type.str, known_version,
                    new_version))
                self._known_versions[sync_type] = new_version
            else:
                LOG.info(
                    "on_version: %s is newer than %s" % (
                        new_version, old_version
                    )
                )

            # If we already have a request out for this type of map,
            # then consider cancelling it if we've already waited for
            # a while.
            if self._fetching_at[sync_type] is not None:
                if now() - self._fetching_at[sync_type] < self.FETCH_TIMEOUT:
                    LOG.info("Fetch already underway for %s" % sync_type.str)
                    return
                else:
                    LOG.warn("Abandoning fetch for %s started at %s" % (
                        sync_type.str, self._fetching_at[sync_type]))

            LOG.info(
                "on_version: fetching %s/%s , "
                "currently got %s, know %s" % (
                    sync_type, new_version, old_version, known_version
                )
            )
            return self.fetch(sync_type)
コード例 #9
0
    def _emit(self, severity, message, **associations):
        """:param severity: One of the defined serverity values

        :param message: One line human readable string

        :param associations: Optional extra attributes to associate

                             the event with a particular cluster/server/service

        """
        now_utc = now()
        LOG.info("Eventer._emit: %s/%s/%s" %
                 (now_utc, severity_str(severity), message))

        self._events.append(
            Event(id=str(uuid.uuid4()),
                  when=now_utc,
                  message=message,
                  severity=severity,
                  **associations))
コード例 #10
0
    def on_server_heartbeat(self, fqdn, server_heartbeat):
        """Call back for when a ceph.service message is received from a salt

        minion. This is actually a fairly simple operation of updating the

        in memory ServerState to reflect what is in the message, but it's

        convoluted because we may be seeing a new server, a known server,

        or a server which was known but unmanaged.

        """
        LOG.debug("ServerMonitor.on_server_heartbeat: %s" % fqdn)
        new_server = True
        newly_managed_server = False
        try:
            server_state = self.servers[fqdn]
            new_server = False
        except KeyError:
            # Look up the grains for this server, we need to know its
            # hostname in order to resolve this vs. the OSD map.
            hostname = fqdn

            if hostname in self.hostname_to_server:
                server_state = self.hostname_to_server[hostname]
                if not server_state.managed:
                    # Take over a ServerState that was created from OSD map
                    server_state.managed = True
                    old_fqdn = server_state.fqdn
                    # OSD map servers would have faked up FQDN as hostname,
                    # so clear that out
                    del self.servers[old_fqdn]
                    server_state.fqdn = fqdn
                    self.servers[server_state.fqdn] = server_state
                    for service_name, service in server_heartbeat[
                            'services'].items():
                        self._persister.create_server(
                            Server(
                                fsid=service['fsid'],
                                fqdn=server_state.fqdn,
                                managed=True,
                            ))
                        break
                    new_server = False
                    LOG.info("Server %s went from unmanaged to managed" % fqdn)
                    newly_managed_server = True
                else:
                    # We will go on to treat these as distinct servers even
                    # though they have the same hostname
                    LOG.warn("Hostname clash: FQDNs '%s' and"
                             " '%s' both have hostname %s" %
                             (fqdn, server_state.fqdn, hostname))
        else:
            # The case where hostname == FQDN, we may already have this
            # FQDN in our map from an unmanaged server being reported by
            # hostname.
            if not server_state.managed:
                newly_managed_server = True
                server_state.managed = True
                for service_name, service in server_heartbeat[
                        'services'].items():
                    self._persister.create_server(
                        Server(
                            fsid=service['fsid'],
                            fqdn=server_state.fqdn,
                            managed=True,
                        ))
                    LOG.info("Server %s went from unmanaged to managed" % fqdn)
                    break

        boot_time = datetime.datetime.fromtimestamp(
            server_heartbeat['boot_time'], tz=tz.tzutc())
        if new_server:
            hostname = fqdn
            server_state = ServerState(
                fqdn,
                hostname,
                managed=True,
                last_contact=now(),
                boot_time=boot_time,
                ceph_version=server_heartbeat['ceph_version'])
            self.inject_server(server_state)
            for service_name, service in server_heartbeat['services'].items():
                self._persister.create_server(
                    Server(fsid=service['fsid'],
                           fqdn=server_state.fqdn,
                           hostname=server_state.hostname,
                           managed=server_state.managed,
                           last_contact=server_state.last_contact,
                           boot_time=boot_time,
                           ceph_version=server_heartbeat['ceph_version']))
                LOG.info("Saw server %s for the first time" % server_state)
                break

        server_state.last_contact = now()
        for service_name, service in server_heartbeat['services'].items():
            self._persister.create_server(
                Server(
                    fsid=service['fsid'],
                    fqdn=server_state.fqdn,
                    last_contact=server_state.last_contact,
                ))
            break

        if server_state.boot_time != boot_time:
            LOG.warn("{0} boot time changed, old {1} new {2}".format(
                server_state.fqdn, server_state.boot_time, boot_time))
            old_boot_time = server_state.boot_time
            server_state.boot_time = boot_time
            for service_name, service in server_heartbeat['services'].items():
                self._persister.create_server(
                    Server(
                        fsid=service['fsid'],
                        fqdn=server_state.fqdn,
                        boot_time=server_state.boot_time,
                    ))
                break

            if old_boot_time is not None:
                # i.e. a reboot, not an unmanaged->managed transition
                if server_state.boot_time < old_boot_time:
                    LOG.warn("Server boot time went backwards")
                elif server_state.boot_time - old_boot_time < REBOOT_THRESHOLD:
                    LOG.warn("Server boot time changed, but only a little")
                else:
                    # A substantial forward change in boot time, that's a
                    # reboot: emit a user visible event
                    LOG.warn("{0} rebooted!".format(fqdn))
                    self._eventer.on_reboot(server_state, False)

        if server_state.ceph_version != server_heartbeat['ceph_version']:
            # Interpret "no package installed but some services running" as
            # meaning we're in the process of upgrading.
            upgrading = server_heartbeat[
                'ceph_version'] is None and server_heartbeat['services']
            if server_heartbeat['ceph_version'] is None and upgrading:
                # Ignore version=None while upgrading to avoid generating
                # spurious "ceph uninstalled" events
                pass
            else:
                server_state.ceph_version = server_heartbeat['ceph_version']
                for service_name, service in server_heartbeat[
                        'services'].items():
                    self._persister.create_server(
                        Server(
                            fsid=service['fsid'],
                            fqdn=server_state.fqdn,
                            ceph_version=server_state.ceph_version,
                        ))
                    break

                if not (new_server or newly_managed_server):
                    self._eventer.on_new_version(server_state)

        seen_id_tuples = set()
        for service_name, service in server_heartbeat['services'].items():
            id_tuple = ServiceId(service['fsid'], service['type'],
                                 service['id'])
            seen_id_tuples.add(id_tuple)
            self._register_service(server_state,
                                   id_tuple,
                                   running=True,
                                   status=service['status'],
                                   fsid=service['fsid'],
                                   fqdn=fqdn)

        # For any service which was last reported on this server but
        # is now gone, mark it as not running
        for unseen_id_tuple in set(
                server_state.services.keys()) ^ seen_id_tuples:
            service_state = self.services[unseen_id_tuple]
            if service_state.running:
                LOG.info("Service %s stopped on server %s" %
                         (service_state, server_state))
                service_state.running = False

        if new_server or newly_managed_server:
            # We do this at the end so that by the time we emit the event
            # the ServiceState objects have been created
            self._eventer.on_server(server_state)
コード例 #11
0
ファイル: __init__.py プロジェクト: fbalak/ceph-integration
    def on_sync_object(self, data):

        assert data['fsid'] == self.fsid

        sync_object = copy.deepcopy(data['data'])

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(data['type'], data['version'],
                                             sync_object)
        if new_object:
            NS.ceph.objects.SyncObject(
                updated=now(),
                sync_type=sync_type.str,
                version=new_object.version if isinstance(
                    new_object.version, int) else None,
                when=now(),
                data=data['data']).save()

            if sync_type.str == "health":
                NS.ceph.objects.GlobalDetails(
                    status=sync_object['overall_status']).save()
            if sync_type.str == "osd_map":
                util_data = self._get_utilization_data()
                NS.ceph.objects.Utilization(
                    total=util_data['cluster']['total'],
                    used=util_data['cluster']['used'],
                    available=util_data['cluster']['available'],
                    pcnt_used=util_data['cluster']['pcnt_used']).save()

                for raw_pool in sync_object.get('pools', []):
                    Event(
                        Message(priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Updating Pool %s" % raw_pool['pool_name']
                                }))
                    for pool in util_data['pools']:
                        if pool['name'] == raw_pool['pool_name']:
                            pool_used = pool['used']
                            pcnt = pool['pcnt_used']
                    pool_type = 'replicated'
                    if 'erasure_code_profile' in raw_pool and \
                        raw_pool['erasure_code_profile'] != "":
                        pool_type = 'erasure_coded'
                    quota_enabled = False
                    if ('quota_max_objects' in raw_pool and
                        raw_pool['quota_max_objects'] > 0) or \
                        ('quota_max_bytes' in raw_pool and
                         raw_pool['quota_max_bytes'] > 0):
                        quota_enabled = True
                    NS.ceph.objects.Pool(
                        pool_id=raw_pool['pool'],
                        pool_name=raw_pool['pool_name'],
                        pg_num=raw_pool['pg_num'],
                        type=pool_type,
                        erasure_code_profile=raw_pool.get(
                            'erasure_code_profile'),
                        min_size=raw_pool['min_size'],
                        size=raw_pool.get('size', None),
                        quota_enabled=quota_enabled,
                        quota_max_objects=raw_pool['quota_max_objects'],
                        quota_max_bytes=raw_pool['quota_max_bytes'],
                        used=pool_used,
                        percent_used=pcnt).save()
        else:
            Event(
                Message(priority="warning",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "ClusterMonitor.on_sync_object: "
                            "stale object received for %s" % data['type']
                        }))
コード例 #12
0
    def on_sync_object(self, data):

        assert data['fsid'] == self.fsid

        sync_object = copy.deepcopy(data['data'])

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(data['type'], data['version'],
                                             sync_object)
        self._request_coll.on_map(sync_type, new_object)
        if new_object:
            NS.ceph.objects.SyncObject(
                updated=now(),
                sync_type=sync_type.str,
                version=new_object.version if isinstance(
                    new_object.version, int) else None,
                when=now(),
                data=data['data']).save(update=False)

            if sync_type.str == "health":
                NS.ceph.objects.GlobalDetails(
                    status=sync_object['overall_status']).save()
            if sync_type.str == "osd_map":
                util_data = self._get_utilization_data()
                NS.ceph.objects.Utilization(
                    total=util_data['cluster']['total'],
                    used=util_data['cluster']['used'],
                    available=util_data['cluster']['available'],
                    pcnt_used=util_data['cluster']['pcnt_used']).save()

                for raw_pool in sync_object.get('pools', []):
                    Event(
                        Message(priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Updating Pool %s" % raw_pool['pool_name']
                                }))
                    pool_used = 0
                    pcnt = 0
                    for pool in util_data['pools']:
                        if pool['name'] == raw_pool['pool_name']:
                            pool_used = pool['used']
                            pcnt = pool['pcnt_used']
                    pool_type = 'replicated'
                    if 'erasure_code_profile' in raw_pool and \
                        raw_pool['erasure_code_profile'] != "":
                        pool_type = 'erasure_coded'
                    quota_enabled = False
                    if ('quota_max_objects' in raw_pool and
                        raw_pool['quota_max_objects'] > 0) or \
                        ('quota_max_bytes' in raw_pool and
                         raw_pool['quota_max_bytes'] > 0):
                        quota_enabled = True
                    NS.ceph.objects.Pool(
                        pool_id=raw_pool['pool'],
                        pool_name=raw_pool['pool_name'],
                        pg_num=raw_pool['pg_num'],
                        type=pool_type,
                        erasure_code_profile=raw_pool.get(
                            'erasure_code_profile'),
                        min_size=raw_pool['min_size'],
                        size=raw_pool.get('size', None),
                        quota_enabled=quota_enabled,
                        quota_max_objects=raw_pool['quota_max_objects'],
                        quota_max_bytes=raw_pool['quota_max_bytes'],
                        used=pool_used,
                        percent_used=pcnt).save()
                for raw_osd in sync_object.get('osds', []):
                    Event(
                        Message(priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Updating OSD %s" % raw_osd['osd']
                                }))
                    NS.ceph.objects.Osd(
                        id=raw_osd['osd'],
                        uuid=raw_osd['uuid'],
                        public_addr=raw_osd['public_addr'],
                        cluster_addr=raw_osd['cluster_addr'],
                        heartbeat_front_addr=raw_osd['heartbeat_front_addr'],
                        heartbeat_back_addr=raw_osd['heartbeat_back_addr'],
                        down_at=raw_osd['down_at'],
                        up_from=raw_osd['up_from'],
                        lost_at=raw_osd['lost_at'],
                        osd_up=raw_osd['up'],
                        osd_in=raw_osd['in'],
                        up_thru=raw_osd['up_thru'],
                        weight=str(raw_osd['weight']),
                        primary_affinity=str(raw_osd['primary_affinity']),
                        state=raw_osd['state'],
                        last_clean_begin=raw_osd['last_clean_begin'],
                        last_clean_end=raw_osd['last_clean_end']).save()
        else:
            Event(
                Message(priority="warning",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "ClusterMonitor.on_sync_object: "
                            "stale object received for %s" % data['type']
                        }))
コード例 #13
0
    def on_version(self, sync_type, new_version):
        """Notify me that a particular version of a particular map exists.

        I may choose to initiate RPC to retrieve the map

        """
        Event(
            Message(
                priority="debug",
                publisher=NS.publisher_id,
                payload={"message": "SyncObjects.on_version %s/%s" %
                                    (sync_type.str, new_version)
                         }
            )
        )
        old_version = self.get_version(sync_type)
        if sync_type.cmp(new_version, old_version) > 0:
            known_version = self._known_versions[sync_type]
            if sync_type.cmp(new_version, known_version) > 0:
                # We are out of date: request an up to date copy
                Event(
                    Message(
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Advanced known version %s/%s "
                                            "%s->%s" % (self._cluster_name,
                                                        sync_type.str,
                                                        known_version,
                                                        new_version
                                                        )
                                 }
                    )
                )
                self._known_versions[sync_type] = new_version
            else:
                Event(
                    Message(
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "on_version: %s is newer than %s"
                                            % (new_version, old_version)
                                 }
                    )
                )

            # If we already have a request out for this type of map,
            # then consider cancelling it if we've already waited for
            # a while.
            if self._fetching_at[sync_type] is not None:
                if now() - self._fetching_at[sync_type] < self.FETCH_TIMEOUT:
                    Event(
                        Message(
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={"message": "Fetch already underway for %s"
                                                % sync_type.str
                                     }
                        )
                    )
                    return
                else:
                    Event(
                        Message(
                            priority="debug",
                            publisher=NS.publisher_id,
                            payload={"message": "Abandoning fetch for %s "
                                                "started at %s"
                                                % (sync_type.str,
                                                   self._fetching_at[sync_type]
                                                   )
                                     }
                        )
                    )

            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={"message": "on_version: fetching %s/%s , "
                                        "currently got %s, know %s"
                                        % (sync_type, new_version,
                                           old_version, known_version
                                           )
                             }
                )
            )
            return self.fetch(sync_type)
コード例 #14
0
ファイル: test_util.py プロジェクト: anmolbabu/ceph_bridge
def test_now():
    assert type(util.now()) is datetime.datetime