コード例 #1
0
ファイル: __init__.py プロジェクト: nthomas-redhat/notifier
 def __init__(self):
     super(NotificationPluginManager, self).__init__()
     self.daemon = True
     try:
         self.load_plugins()
         notification_medium = []
         self.complete = threading.Event()
         for plugin in NotificationPlugin.plugins:
             notification_medium.append(plugin.name)
         NS.notifier.objects.NotificationMedia(
             media=notification_medium).save()
     except (AttributeError, SyntaxError, ValueError, KeyError, ImportError,
             etcd.EtcdException) as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher="notifier",
                              payload={
                                  "message":
                                  'Failed to intialize notification '
                                  'manager',
                                  "exception":
                                  ex
                              }))
         raise ex
コード例 #2
0
 def _run(self):
     while not self._complete.is_set():
         cluster_summaries = []
         clusters = central_store_util.get_cluster_ids()
         for clusterid in clusters:
             gevent.sleep(0.1)
             try:
                 cluster_summary = self.parse_cluster(clusterid)
                 cluster_summaries.append(cluster_summary.copy())
                 cluster_summary.save(update=False)
             except EtcdKeyNotFound:
                 pass
             except (EtcdException, AttributeError) as ex:
                 Event(
                     ExceptionMessage(priority="debug",
                                      publisher=NS.publisher_id,
                                      payload={
                                          "message":
                                          'Error caught computing summary.',
                                          "exception": ex
                                      }))
                 continue
         NS.sds_monitoring_manager.compute_system_summary(cluster_summaries)
         gevent.sleep(60)
コード例 #3
0
 def init_monitoring(self):
     try:
         node_dets = central_store_util.get_nodes_details()
         for node_det in node_dets:
             if (
                 node_det['node_id'] not in
                 self.monitoring_config_init_nodes
             ):
                 self.init_monitoring_on_node(node_det)
                 self.monitoring_config_init_nodes.append(
                     node_det['node_id']
                 )
     except TendrlPerformanceMonitoringException as ex:
         Event(
             ExceptionMessage(
                 priority="debug",
                 publisher=NS.publisher_id,
                 payload={"message": 'Failed to intialize monitoring '
                                     'configuration on nodes. ',
                          "exception": ex
                          }
             )
         )
         raise ex
コード例 #4
0
    def osds_by_pool(self):
        """Get the OSDS which may be used in this pool

        :return dict of pool ID to OSD IDs in the pool

        """

        result = {}
        for pool_id, pool in self.pools_by_id.items():
            osds = None
            for rule in [
                    r for r in self.data[
                        'crush'
                    ]['rules'] if r['ruleset'] == pool['crush_ruleset']
            ]:
                if rule['min_size'] <= pool['size'] <= rule['max_size']:
                    osds = self.osds_by_rule_id[rule['rule_id']]

            if osds is None:
                # Fallthrough, the pool size didn't fall within any of the
                # rules in its ruleset, Calamari doesn't understand.
                # Just report all OSDs instead of failing horribly.
                Event(
                    Message(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={"message": "Cannot determine OSDS for pool %s"
                                            % pool_id
                                 }
                    )
                )
                osds = self.osds_by_id.keys()

            result[pool_id] = osds

        return result
コード例 #5
0
def ceph_create_ssh_setup_jobs(parameters):
    node_list = parameters['Node[]']
    ssh_job_ids = []
    ssh_setup_script = NS.ceph_provisioner.get_plugin().setup()
    if len(node_list) > 0:
        for node in node_list:
            if NS.node_context.node_id != node:
                new_params = parameters.copy()
                new_params['Node[]'] = [node]
                new_params['ssh_setup_script'] = ssh_setup_script
                # create same flow for each node in node list except $this
                payload = {
                    "tags": ["tendrl/node_%s" % node],
                    "run": "tendrl.flows.SetupSsh",
                    "status": "new",
                    "parameters": new_params,
                    "parent": parameters['job_id'],
                    "type": "node"
                }
                _job_id = str(uuid.uuid4())
                Job(job_id=_job_id,
                    status="new",
                    payload=payload).save()
                ssh_job_ids.append(_job_id)
                Event(
                    Message(
                        job_id=parameters['job_id'],
                        flow_id=parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Created SSH setup job %s for node"
                                            " %s" % (_job_id, node)
                                 }
                    )
                )
    return ssh_job_ids
コード例 #6
0
def log(log_priority,
        publisher_id,
        log_payload,
        job_id=None,
        flow_id=None,
        parent_id=None,
        cluster_id=None):
    """Function used for logging errors/output/info.

    Args:
        log_priority [Type : String]: Priority of the Log Message (error/info)
        publisher_id [Type : Integer] : Id of publisher (mandatory)
        log_payload [Type: Dict] : Payload can contain /
                                   parameters like message that is to be logged
    """
    caller_details = getframeinfo(stack()[1][0])
    caller_details = {
        "filename": caller_details.filename,
        "line_no": caller_details.lineno,
        "function": caller_details.function
    }
    try:
        Event(
            Message(log_priority,
                    publisher_id,
                    log_payload,
                    job_id,
                    flow_id,
                    parent_id,
                    cluster_id,
                    caller=caller_details))
    except Exception:
        if log_priority.lower() == "error":
            sys.stderr.write(log_payload.get("message"))
        else:
            sys.stdout.write(log_payload.get("message"))
コード例 #7
0
    def run(self):
        retry_count = 0
        while True:
            volumes = None
            try:
                volumes = NS._int.client.read("clusters/%s/Volumes" %
                                              NS.tendrl_context.integration_id)
            except etcd.EtcdKeyNotFound:
                # ignore as no volumes available till now
                pass

            if volumes:
                for entry in volumes.leaves:
                    volume = Volume(
                        vol_id=entry.key.split("Volumes/")[-1]).load()
                    if volume.name == self.parameters['Volume.volname']:
                        return True

            retry_count += 1
            time.sleep(1)
            if retry_count == 600:
                Event(
                    Message(priority="error",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Volume %s not reflected in tendrl"
                                " yet. Timing out" %
                                self.parameters['Volume.volname']
                            },
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            cluster_id=NS.tendrl_context.integration_id))
                raise AtomExecutionFailedError(
                    "Volume %s not reflected in tendrl yet. Timing out" %
                    self.parameters['Volume.volname'])
コード例 #8
0
 def __init__(self):
     super(NotificationPluginManager, self).__init__()
     try:
         self.load_plugins()
         notification_medium = []
         for plugin in NotificationPlugin.plugins:
             notification_medium.append(plugin.name)
         NS.notification_medium = notification_medium
         NotificationMedia(media=notification_medium).save()
         self.save_alertnotificationconfig()
     except (SyntaxError, ValueError, KeyError, etcd.EtcdKeyNotFound,
             etcd.EtcdConnectionFailed, etcd.EtcdException,
             NotificationPluginError) as ex:
         Event(
             ExceptionMessage(priority="error",
                              publisher="alerting",
                              payload={
                                  "message":
                                  'Failed to intialize notification '
                                  'manager',
                                  "exception":
                                  ex
                              }))
         raise AlertingError(str(ex))
コード例 #9
0
    def _application(self, env, start_response):
        try:
            if env['PATH_INFO'] != '/grafana_callback':
                start_response('404 Not Found',
                               [('Content-Type', 'text/html')])
                response = [b'<h1>Alert Not Found</h1>']
            else:
                data = env['wsgi.input'].read()
                data = json.loads(data)
                self.alert_handler.handle_alert(data["ruleId"])
                start_response('200 OK', [('Content-Type', 'text/html')])
                response = [b'<h1>Alert Received</h1>']
        except (IOError, AssertionError) as ex:
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message":
                                     "Unable to read alert from socket",
                                     "exception": ex
                                 }))
            response = [b'<h1>Error in reading alert from socket</h1>']

        return response
コード例 #10
0
ファイル: __init__.py プロジェクト: rishubhjain/commons
 def _create_node_id(self):
     node_id = str(uuid.uuid4())
     index_key = "/indexes/machine_id/%s" % self.machine_id
     NS._int.wclient.write(index_key, node_id, prevExist=False)
     try:
         Event(
             Message(priority="debug",
                     publisher=NS.publisher_id,
                     payload={
                         "message":
                         "Registered Node (%s) with "
                         "machine_id==%s" % (node_id, self.machine_id)
                     }))
     except KeyError:
         sys.stdout.write("message: Registered Node (%s) with "
                          "machine_id==%s" % (node_id, self.machine_id))
     local_node_id = "/var/lib/tendrl/node_id"
     if not os.path.exists(os.path.dirname(local_node_id)):
         os.makedirs(os.path.dirname(local_node_id))
     with open(local_node_id, 'wb+') as f:
         f.write(node_id)
     global NODE_ID
     NODE_ID = node_id
     return node_id
コード例 #11
0
    def run(self):
        retry_count = 0
        while True:
            _cluster = None
            try:
                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=self.parameters[
                        "TendrlContext.integration_id"
                    ]
                ).load()
            except etcd.EtcdKeyNotFound:
                # pass and continue the time out below
                pass

            if _cluster and _cluster.is_managed == "yes":
                return True

            retry_count += 1
            time.sleep(1)
            if retry_count == 600:
                Event(
                    Message(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Cluster data sync still incomplete. "
                                       "Timing out"
                        },
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        cluster_id=NS.tendrl_context.integration_id,
                    )
                )
                raise AtomExecutionFailedError(
                    "Cluster data sync still incomplete. Timing out"
                )
コード例 #12
0
    def complete_jid(self, result):
        """Call this when remote execution is done.

        Implementations must always update .jid appropriately

        here: either to the jid of a new job, or to None.

        """
        self.result = result
        Event(
            Message(priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Request %s JID %s completed with result="
                        "%s" % (self.id, self.jid, self.result)
                    }))

        self.jid = None

        # This is a default behaviour for UserRequests which don't
        # override this method: assume completion of a JID means the
        # job is now done.
        self.complete()
コード例 #13
0
    def on_map(self, sync_type, sync_object):
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "PgCreatingRequest %s %s" %
                        (sync_type.str, self._phase)
                    }))
        if self._phase == self.PG_MAP_WAIT:
            if sync_type == PgSummary:
                # Count the PGs in this pool which are not in state 'creating'
                pg_summary = sync_object
                pgs_not_creating = 0

                for state_tuple, count in pg_summary.data['by_pool'][
                        self._pool_id].items():
                    states = state_tuple.split("+")
                    if 'creating' not in states:
                        pgs_not_creating += count

                self._pg_progress.set_created_pg_count(pgs_not_creating)
                Event(
                    Message(priority="debug",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "PgCreatingRequest.on_map: "
                                "pg_counter=%s/%s (final %s)" %
                                (pgs_not_creating, self._pg_progress.goal,
                                 self._pg_progress.final)
                            }))
                if pgs_not_creating >= self._pg_progress.goal:
                    if self._pg_progress.is_final_block():
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message":
                                        "PgCreatingRequest.on_map "
                                        "Creations complete"
                                    }))
                        if self._post_create_commands:
                            Event(
                                Message(priority="debug",
                                        publisher=NS.publisher_id,
                                        payload={
                                            "message":
                                            "PgCreatingRequest."
                                            "on_map Issuing "
                                            "post-create commands"
                                        }))
                            self._submit(self._post_create_commands)
                            self._phase = self.JID_WAIT
                        else:
                            Event(
                                Message(priority="debug",
                                        publisher=NS.publisher_id,
                                        payload={
                                            "message":
                                            "PgCreatingRequest.on_"
                                            "map All done"
                                        }))
                            self.complete()
                    else:
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message":
                                        "PgCreatingREQUEST.on_map "
                                        "Issuing more creates"
                                    }))
                        self._pg_progress.advance_goal()
                        # Request another tranche of PGs up to _block_size
                        self._submit([('osd pool set', {
                            'pool': self._pool_name,
                            'var': 'pg_num',
                            'val': self._pg_progress.goal
                        })])
                        self._phase = self.JID_WAIT
            elif sync_type == OsdMap:
                # Keep an eye on the OsdMap to check that pg_num is what we
                # expect: otherwise if forces of darkness changed pg_num
                # then our PG creation check could get confused and fail
                # to complete.
                osd_map = sync_object
                pool = osd_map.pools_by_id[self._pool_id]
                if pool['pg_num'] != self._pg_progress.expected_count():
                    self.set_error(
                        "PG creation interrupted (unexpected change to pg_num)"
                    )
                    self.complete()
                    return
            else:
                raise NotImplementedError(
                    "Unexpected map {1} in state {2}".format(
                        sync_type, self._phase))

        elif self._phase == self.OSD_MAP_WAIT:
            # Read back the pg_num for my pool from the OSD map
            osd_map = sync_object
            pool = osd_map.pools_by_id[self._pool_id]

            # In Ceph <= 0.67.7, "osd pool set pg_num" will return
            # success even if it hasn't really increased pg_num,
            # so we must examine the OSD map to see if it really succeded
            if pool['pg_num'] != self._pg_progress.expected_count():
                self.set_error("PG creation failed (check that there"
                               " aren't already PGs in 'creating' state)")
                self.complete()
                return

            assert self._await_version
            ready = osd_map.version >= self._await_version
            if ready:
                # OSD map advancement either means a PG creation round
                # completed, or that the post_create_commands completed.
                # Distinguish by looking at pg_progress.
                if self._pg_progress.is_complete():
                    # This was the OSD map update from the
                    # post_create_commands, we we're all done!
                    self.complete()
                else:
                    # This was the OSD map update from a PG creation command,
                    # so start waiting for the pgs
                    self._phase = self.PG_MAP_WAIT
        else:
            raise NotImplementedError("Unexpected {0} in phase {1}".format(
                sync_type, self._phase))
コード例 #14
0
ファイル: __init__.py プロジェクト: Tendrl/ceph-integration
    def _get_utilization_data(self):
        from ceph_argparse import json_command
        import rados
        _conf_file = os.path.join("/etc/ceph",
                                  NS.tendrl_context.cluster_name + ".conf")
        # TODO(shtripat) use ceph.ceph_command instead of rados/json_command
        cluster_handle = rados.Rados(
            name=ceph.RADOS_NAME,
            clustername=NS.tendrl_context.cluster_name,
            conffile=_conf_file)
        cluster_handle.connect()
        prefix = 'df'
        ret, outbuf, outs = json_command(cluster_handle,
                                         prefix=prefix,
                                         argdict={},
                                         timeout=ceph.RADOS_TIMEOUT)
        if ret != 0:
            cluster_handle.shutdown()
            raise rados.Error(outs)
        else:
            outbuf = outbuf.replace('RAW USED', 'RAW_USED')
            outbuf = outbuf.replace('%RAW USED', '%RAW_USED')
            outbuf = outbuf.replace('MAX AVAIL', 'MAX_AVAIL')
            lines = outbuf.split('\n')
            index = 0
            cluster_stat = {}
            pool_stat = {}
            pool_stat_available = False
            cluster_handle.shutdown()

            while index < len(lines):
                line = lines[index]
                if line == "" or line == '\n':
                    index += 1
                    continue
                if "GLOBAL" in line:
                    index += 1
                    if len(lines) < 3:
                        raise rados.Error("Failed to parse pool stats data")
                    cluster_fields = lines[index].split()
                    cluster_size_idx = self._idx_in_list(
                        cluster_fields, 'SIZE')
                    cluster_avail_idx = self._idx_in_list(
                        cluster_fields, 'AVAIL')
                    cluster_used_idx = self._idx_in_list(
                        cluster_fields, 'RAW_USED')
                    cluster_pcnt_used_idx = self._idx_in_list(
                        cluster_fields, '%RAW_USED')
                    if cluster_size_idx == -1 or cluster_avail_idx == -1 or \
                        cluster_used_idx == -1 or cluster_pcnt_used_idx == -1:
                        raise rados.Error("Missing fields in cluster stat")
                    index += 1
                    if index >= len(lines):
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message": "No cluster stats to parse"
                                    }))
                        return {'cluster': cluster_stat, 'pools': {}}
                    line = lines[index]
                    cluster_fields = line.split()
                    if len(cluster_fields) < 4:
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message":
                                        "Missing fields in cluster"
                                        " stat"
                                    }))
                        return {'cluster': cluster_stat, 'pools': {}}
                    cluster_stat['total'] = self._to_bytes(
                        cluster_fields[cluster_size_idx])
                    cluster_stat['used'] = self._to_bytes(
                        cluster_fields[cluster_used_idx])
                    cluster_stat['available'] = self._to_bytes(
                        cluster_fields[cluster_avail_idx])
                    cluster_stat['pcnt_used'] = cluster_fields[
                        cluster_pcnt_used_idx]
                if "POOLS" in line:
                    pool_stat_available = True
                    index += 1
                    if index >= len(lines):
                        Event(
                            Message(
                                priority="debug",
                                publisher=NS.publisher_id,
                                payload={"message": "No pool stats to parse"}))
                        return {'cluster': cluster_stat, 'pools': {}}
                    pool_fields = lines[index].split()
                    pool_name_idx = self._idx_in_list(pool_fields, 'NAME')
                    pool_id_idx = self._idx_in_list(pool_fields, 'ID')
                    pool_used_idx = self._idx_in_list(pool_fields, 'USED')
                    pool_pcnt_used_idx = self._idx_in_list(
                        pool_fields, '%USED')
                    pool_max_avail_idx = self._idx_in_list(
                        pool_fields, 'MAX_AVAIL')
                    if pool_name_idx == -1 or pool_id_idx == -1 or \
                        pool_used_idx == -1 or pool_pcnt_used_idx == -1 or \
                        pool_max_avail_idx == -1:
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message": "Missing fields in pool "
                                        "stat"
                                    }))
                        return {'cluster': cluster_stat, 'pools': {}}
                    index += 1
                if pool_stat_available is True:
                    line = lines[index]
                    pool_fields = line.split()
                    if len(pool_fields) < 5:
                        Event(
                            Message(priority="debug",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message": "Missing fields in pool"
                                        " stat"
                                    }))
                        return {'cluster': cluster_stat, 'pools': {}}

                    loc_dict = {}
                    loc_dict['available'] = self._to_bytes(
                        pool_fields[pool_max_avail_idx])
                    loc_dict['used'] = self._to_bytes(
                        pool_fields[pool_used_idx])
                    loc_dict['pcnt_used'] = pool_fields[pool_pcnt_used_idx]
                    pool_stat[pool_fields[pool_name_idx]] = loc_dict
                index += 1

            return {'cluster': cluster_stat, 'pools': pool_stat}
コード例 #15
0
ファイル: __init__.py プロジェクト: Tendrl/ceph-integration
    def on_sync_object(self, data):

        assert data['fsid'] == self.fsid

        sync_object = copy.deepcopy(data['data'])

        sync_type = SYNC_OBJECT_STR_TYPE[data['type']]
        new_object = self.inject_sync_object(data['type'], data['version'],
                                             sync_object)
        self._request_coll.on_map(sync_type, new_object)
        if new_object:
            # Check and raise any alerts if required

            # TODO(team) Enabled the below if condition as when
            # alerting needed for cluster health, mon status, pool
            # status etc

            # if sync_type.str == "health":
            #    self._on_health(sync_object)
            # if sync_type.str == "mon_status":
            #    self._on_mon_status(sync_object)
            if sync_type.str == "osd_map":
                # self._on_pool_status(sync_object)
                self._on_osd_map(sync_object)

            NS.ceph.objects.SyncObject(
                updated=now(),
                sync_type=sync_type.str,
                version=new_object.version if isinstance(
                    new_object.version, int) else None,
                when=now(),
                data=data['data']).save(update=False)

            if sync_type.str == "health":
                NS.ceph.objects.GlobalDetails(
                    status=sync_object['overall_status']).save()
            if sync_type.str == "osd_map":
                # Pool out of band deletion handling
                try:
                    pools = NS._int.client.read(
                        "clusters/%s/Pools" % NS.tendrl_context.integration_id)
                    old_pool_ids = []
                    for pool in pools.leaves:
                        old_pool_ids.append(int(pool.key.split("/")[-1]))
                    new_pool_ids = []
                    for raw_pool in sync_object.get('pools', []):
                        new_pool_ids.append(raw_pool['pool'])
                    delete_pool_ids = set(old_pool_ids) - set(new_pool_ids)
                    for id in delete_pool_ids:
                        NS._int.client.delete(
                            "clusters/%s/Pools/%s" %
                            (NS.tendrl_context.integration_id, id),
                            recursive=True)
                except etcd.EtcdKeyNotFound as ex:
                    Event(
                        ExceptionMessage(priority="debug",
                                         publisher=NS.publisher_id,
                                         payload={
                                             "message":
                                             "No pools found \
                                     for ceph cluster %s" %
                                             NS.tendrl_context.integration_id,
                                             "exception":
                                             ex
                                         }))
                for raw_pool in sync_object.get('pools', []):
                    Event(
                        Message(priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Updating Pool %s" % raw_pool['pool_name']
                                }))
                    pool_type = 'replicated'
                    if 'erasure_code_profile' in raw_pool and \
                        raw_pool['erasure_code_profile'] != "":
                        pool_type = 'erasure_coded'
                    quota_enabled = False
                    if ('quota_max_objects' in raw_pool and
                        raw_pool['quota_max_objects'] > 0) or \
                        ('quota_max_bytes' in raw_pool and
                         raw_pool['quota_max_bytes'] > 0):
                        quota_enabled = True
                    NS.ceph.objects.Pool(
                        pool_id=raw_pool['pool'],
                        pool_name=raw_pool['pool_name'],
                        pg_num=raw_pool['pg_num'],
                        type=pool_type,
                        erasure_code_profile=raw_pool.get(
                            'erasure_code_profile'),
                        min_size=raw_pool['min_size'],
                        size=raw_pool.get('size', None),
                        quota_enabled=quota_enabled,
                        quota_max_objects=raw_pool['quota_max_objects'],
                        quota_max_bytes=raw_pool['quota_max_bytes'],
                    ).save()
                # Osd out of band deletion handling
                try:
                    osds = NS._int.client.read(
                        "clusters/%s/Osds" % NS.tendrl_context.integration_id)
                    old_osds = []
                    for osd in osds.leaves:
                        old_osds.append(str(osd.key.split("/")[-1]))
                    new_osds = []
                    for raw_osd in sync_object.get('osds', []):
                        new_osds.append(raw_osd['uuid'])
                    delete_osds = set(old_osds) - set(new_osds)
                    for id in delete_osds:
                        NS._int.client.delete(
                            "clusters/%s/Osds/%s" %
                            (NS.tendrl_context.integration_id, id),
                            recursive=True)
                except etcd.EtcdKeyNotFound as ex:
                    Event(
                        ExceptionMessage(priority="debug",
                                         publisher=NS.publisher_id,
                                         payload={
                                             "message":
                                             "key not found in etcd",
                                             "exception": ex
                                         }))
                for raw_osd in sync_object.get('osds', []):
                    Event(
                        Message(priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Updating OSD %s" % raw_osd['osd']
                                }))
                    osd_host = socket.gethostbyaddr(
                        raw_osd['public_addr'].split(':')[0])[0]
                    NS.ceph.objects.Osd(
                        id=raw_osd['osd'],
                        uuid=raw_osd['uuid'],
                        hostname=osd_host,
                        public_addr=raw_osd['public_addr'],
                        cluster_addr=raw_osd['cluster_addr'],
                        heartbeat_front_addr=raw_osd['heartbeat_front_addr'],
                        heartbeat_back_addr=raw_osd['heartbeat_back_addr'],
                        down_at=raw_osd['down_at'],
                        up_from=raw_osd['up_from'],
                        lost_at=raw_osd['lost_at'],
                        osd_up=raw_osd['up'],
                        osd_in=raw_osd['in'],
                        up_thru=raw_osd['up_thru'],
                        weight=str(raw_osd['weight']),
                        primary_affinity=str(raw_osd['primary_affinity']),
                        state=raw_osd['state'],
                        last_clean_begin=raw_osd['last_clean_begin'],
                        last_clean_end=raw_osd['last_clean_end']).save()
        else:
            Event(
                Message(priority="debug",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "ClusterMonitor.on_sync_object: "
                            "stale object received for %s" % data['type']
                        }))
コード例 #16
0
def get_node_network():
    """return

           [{"ipv4": ["ipv4address", ...],
             "ipv6": ["ipv6address, ..."],
             "netmask": ["subnet", ...],
             "subnet": "subnet",
             "status":"up/down",
             "interface_id": "",
             "sysfs_id": "",
             "device_link": "",
             "interface_type": "",
             "model": "",
             "driver_modules": "",
             "drive": "",
             "hw_address": "",
             "link_detected": ""
             }, ...
          ]
    """
    rv = []
    network_interfaces = get_node_interface()
    cmd = cmd_utils.Command('hwinfo --network')
    out, err, rc = cmd.run()
    if not err or "vdsmdummy: command not found" in err:
        for interface in out.split('\n\n'):
            devlist = {
                "interface_id": "",
                "sysfs_id": "",
                "device_link": "",
                "interface_type": "",
                "model": "",
                "driver_modules": "",
                "drive": "",
                "interface": "",
                "hw_address": "",
                "link_detected": ""
            }
            for line in interface.split('\n'):
                if "Unique ID" in line:
                    devlist['interface_id'] = \
                        line.split(':')[1].lstrip()
                elif "SysFS ID" in line:
                    devlist['sysfs_id'] = \
                        line.split(':')[1].lstrip()
                elif "SysFS Device Link" in line:
                    devlist['device_link'] = \
                        line.split(':')[1].lstrip()
                elif "Hardware Class" in line:
                    devlist['interface_type'] = \
                        line.split(':')[1].lstrip()
                elif "Model" in line:
                    devlist['model'] = \
                        line.split(':')[1].lstrip().replace('"', "")
                elif "Driver Modules" in line:
                    devlist['driver_modules'] = \
                        line.split(':')[1].lstrip().replace('"', "")
                elif "Driver" in line:
                    devlist['driver'] = \
                        line.split(':')[1].lstrip().replace('"', "")
                elif "Device File" in line:
                    devlist['interface'] = \
                        line.split(':')[1].lstrip()
                elif "HW Address" in line:
                    devlist['hw_address'] = \
                        line.split(':')[1].lstrip()
                elif "Link detected" in line:
                    devlist['link_detected'] = \
                        line.split(':')[1].lstrip()
            if devlist["interface"] in network_interfaces:
                interface_name = devlist["interface"]
                network_interfaces[interface_name].update(devlist)
                rv.append(network_interfaces[interface_name])
    else:
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={"message": err}))

    return rv
コード例 #17
0
def process_job(job):
    jid = job.key.split('/')[-1]
    job_status_key = "/queue/%s/status" % jid
    job_lock_key = "/queue/%s/locked_by" % jid
    NS.node_context = NS.node_context.load()
    # Check job not already locked by some agent
    try:
        _locked_by = etcd_utils.read(job_lock_key).value
        if _locked_by:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # Check job not already "finished", or "processing"
    try:
        _status = etcd_utils.read(job_status_key).value
        if _status in ["finished", "processing"]:
            return
    except etcd.EtcdKeyNotFound:
        pass

    try:
        _job_timeout_key = "/queue/%s/timeout" % jid
        _timeout = None
        _timeout = etcd_utils.read(_job_timeout_key).value
        if _timeout:
            _timeout = _timeout.lower()
    except etcd.EtcdKeyNotFound:
        pass

    # tendrl-node-agent tagged as tendrl/monitor will ensure
    # >10 min old "new" jobs are timed out and marked as
    # "failed" (the parent job of these jobs will also be
    # marked as "failed")
    if "tendrl/monitor" in NS.node_context.tags and \
        _timeout == "yes":
        _job_valid_until_key = "/queue/%s/valid_until" % jid
        _valid_until = None
        try:
            _valid_until = etcd_utils.read(
                _job_valid_until_key).value
        except etcd.EtcdKeyNotFound:
            pass

        if _valid_until:
            _now_epoch = (time_utils.now() -
                          datetime.datetime(1970, 1,
                                            1).replace(
                              tzinfo=utc)).total_seconds()
            if int(_now_epoch) >= int(_valid_until):
                # Job has "new" status since 10 minutes,
                # mark status as "failed" and Job.error =
                # "Timed out"
                try:
                    etcd_utils.write(job_status_key,
                                     "failed",
                                     prevValue="new")
                except etcd.EtcdCompareFailed:
                    pass
                else:
                    job = NS.tendrl.objects.Job(job_id=jid).load()
                    _msg = str("Timed-out (>10min as 'new')")
                    job.errors = _msg
                    job.save()
                    if job.payload.get('parent') is None:
                        alert_utils.alert_job_status(
                            "failed",
                            "Job timed out (job_id: %s)" % jid,
                            integration_id=NS.tendrl_context.integration_id or
                            job.payload['parameters'].get(
                                'TendrlContext.integration_id'
                            ),
                            cluster_name=NS.tendrl_context.cluster_name or
                            job.payload['parameters'].get(
                                'TendrlContext.cluster_name'
                            )
                        )
                    return
        else:
            _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10)
            _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc)

            # noinspection PyTypeChecker
            _now_plus_10_epoch = (_now_plus_10 -
                                  _epoch_start).total_seconds()
            etcd_utils.write(_job_valid_until_key,
                             int(_now_plus_10_epoch))

    job = NS.tendrl.objects.Job(job_id=jid).load()
    if job.payload["type"] == NS.type and \
            job.status == "new":
        # Job routing
        # Flows created by tendrl-api use 'tags' from flow
        # definition to target jobs
        _tag_match = False
        if job.payload.get("tags", []):
            for flow_tag in job.payload['tags']:
                if flow_tag in NS.node_context.tags:
                    _tag_match = True

        if not _tag_match:
            _job_tags = ", ".join(job.payload.get("tags", []))
            _msg = "Node (%s)(type: %s)(tags: %s) will not " \
                   "process job-%s (tags: %s)" % \
                   (NS.node_context.node_id, NS.type,
                    NS.node_context.tags, jid,
                    _job_tags)
            logger.log(
                "info",
                NS.publisher_id,
                {"message": _msg}
            )
            return

        job_status_key = "/queue/%s/status" % job.job_id
        job_lock_key = "/queue/%s/locked_by" % job.job_id
        try:
            lock_info = dict(node_id=NS.node_context.node_id,
                             fqdn=NS.node_context.fqdn,
                             tags=NS.node_context.tags,
                             type=NS.type)
            etcd_utils.write(job_status_key, "processing",
                             prevValue="new")
            etcd_utils.write(job_lock_key,
                             json.dumps(lock_info))
        except etcd.EtcdCompareFailed:
            # job is already being processed by some tendrl
            # agent
            return

        the_flow = None
        try:
            current_ns, flow_name, obj_name = \
                _extract_fqdn(job.payload['run'])

            if obj_name:
                runnable_flow = current_ns.ns.get_obj_flow(
                    obj_name, flow_name)
            else:
                runnable_flow = current_ns.ns.get_flow(flow_name)

            the_flow = runnable_flow(parameters=job.payload[
                'parameters'], job_id=job.job_id)
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Processing Job %s" %
                            job.job_id},
                job_id=job.job_id,
                flow_id=the_flow.parameters['flow_id']
            )

            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Running Flow %s" %
                            job.payload['run']},
                job_id=job.job_id,
                flow_id=the_flow.parameters['flow_id']
            )
            the_flow.run()
            try:
                etcd_utils.write(job_status_key,
                                 "finished",
                                 prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'finished', " \
                       "current job status invalid"
                raise FlowExecutionFailedError(_msg)

            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Job (%s):  Finished "
                            "Flow %s" % (
                                job.job_id,
                                job.payload['run'])},
                job_id=job.job_id,
                flow_id=the_flow.parameters['flow_id'],
            )
            if job.payload.get('parent') is None:
                alert_utils.alert_job_status(
                    "finished",
                    "Job finished successfully (job_id: %s)" % job.job_id,
                    integration_id=NS.tendrl_context.integration_id or
                    job.payload['parameters'].get(
                        'TendrlContext.integration_id'
                    ),
                    cluster_name=NS.tendrl_context.cluster_name or
                    job.payload['parameters'].get(
                        'TendrlContext.cluster_name'
                    )
                )
        except (FlowExecutionFailedError,
                AtomExecutionFailedError,
                Exception) as e:
            _trace = str(traceback.format_exc(e))
            _msg = "Failure in Job %s Flow %s with error:" % \
                   (job.job_id, job.payload['run'])
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={"message": _msg + _trace,
                             "exception": e
                             }
                )
            )
            if the_flow:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": _msg + "\n" + _trace},
                    job_id=job.job_id,
                    flow_id=the_flow.parameters['flow_id']
                )
            else:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": _msg + "\n" + _trace}
                )

            try:
                etcd_utils.write(job_status_key,
                                 "failed",
                                 prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'failed', current" \
                       "job status invalid"
                raise FlowExecutionFailedError(_msg)
            else:
                job = job.load()
                job.errors = _trace
                if job.payload.get('parent') is None:
                    alert_utils.alert_job_status(
                        "failed",
                        "Job failed (job_id: %s)" % job.job_id,
                        integration_id=NS.tendrl_context.integration_id or
                        job.payload['parameters'].get(
                            'TendrlContext.integration_id'
                        ),
                        cluster_name=NS.tendrl_context.cluster_name or
                        job.payload['parameters'].get(
                            'TendrlContext.cluster_name'
                        )
                    )
                job.save()
コード例 #18
0
def main():
    ceph_integration.CephIntegrationNS()
    TendrlNS()

    NS.type = "sds"
    NS.publisher_id = "ceph_integration"

    from tendrl.ceph_integration import sds_sync

    NS.state_sync_thread = sds_sync.CephIntegrationSdsSyncStateThread()

    NS.node_context.save()

    # Check if Integration is part of any Tendrl imported/created sds cluster
    try:
        NS.tendrl_context = NS.tendrl_context.load()
        Event(
            Message(priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Integration %s is part of sds cluster" %
                        NS.tendrl_context.integration_id
                    }))

    except etcd.EtcdKeyNotFound:
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Node %s is not part of any sds cluster" %
                        NS.node_context.node_id
                    }))
        raise Exception("Integration cannot be started, "
                        "please Import or Create sds cluster in Tendrl "
                        "and include Node %s" % NS.node_context.node_id)
    if NS.tendrl_context.integration_id is None:
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Node %s is not part of any sds cluster" %
                        NS.node_context.node_id
                    }))
        raise Exception("Integration cannot be started, "
                        "please Import or Create sds cluster in Tendrl "
                        "and include Node %s" % NS.node_context.node_id)

    NS.tendrl_context.save()
    NS.ceph.definitions.save()
    NS.ceph.config.save()

    if NS.config.data.get("with_internal_profiling", False):
        from tendrl.commons import profiler
        profiler.start()

    m = CephIntegrationManager()
    m.start()

    complete = gevent.event.Event()

    def shutdown():
        Event(
            Message(priority="info",
                    publisher=NS.publisher_id,
                    payload={"message": "Signal handler: stopping"}))
        complete.set()

    gevent.signal(signal.SIGTERM, shutdown)
    gevent.signal(signal.SIGINT, shutdown)

    while not complete.is_set():
        complete.wait(timeout=1)
コード例 #19
0
ファイル: __init__.py プロジェクト: rishubhjain/commons
    def load(self):
        if "Message" not in self.__class__.__name__:
            try:
                # Generate current in memory object hash
                self.hash = self._hash()
                _hash_key = "/{0}/hash".format(self.value)
                _stored_hash = None
                try:
                    _stored_hash = NS._int.client.read(_hash_key).value
                except (etcd.EtcdConnectionFailed, etcd.EtcdException) as ex:
                    if type(ex) != etcd.EtcdKeyNotFound:
                        NS._int.reconnect()
                        _stored_hash = NS._int.client.read(_hash_key).value
                if self.hash == _stored_hash:
                    # No changes in stored object and current object,
                    # dont save current object to central store
                    return self
            except TypeError:
                # no hash for this object, save the current hash as is
                pass

        _copy = self._copy_vars()

        for item in _copy.render():
            try:
                Event(
                    Message(priority="debug",
                            publisher=NS.publisher_id,
                            payload={"message": "Reading %s" % item['key']}))
            except KeyError:
                sys.stdout.write("Reading %s" % item['key'])

            try:
                etcd_resp = NS._int.client.read(item['key'], quorum=True)
            except (etcd.EtcdConnectionFailed, etcd.EtcdException) as ex:
                if type(ex) == etcd.EtcdKeyNotFound:
                    continue
                else:
                    NS._int.reconnect()
                    etcd_resp = NS._int.client.read(item['key'], quorum=True)

            value = etcd_resp.value
            if item['dir']:
                key = item['key'].split('/')[-1]
                dct = dict(key=value)
                if hasattr(_copy, item['name']):
                    dct = getattr(_copy, item['name'])
                    if type(dct) == dict:
                        dct[key] = value
                    else:
                        setattr(_copy, item['name'], dct)
                else:
                    setattr(_copy, item['name'], dct)
                continue

            # convert list, dict (json) to python based on definitions
            _type = self._defs.get("attrs", {}).get(item['name'],
                                                    {}).get("type")
            if _type:
                if _type.lower() in ['json', 'list']:
                    if value:
                        try:
                            value = json.loads(value.decode('utf-8'))
                        except ValueError as ex:
                            _msg = "Error load() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            Event(
                                ExceptionMessage(priority="debug",
                                                 publisher=NS.publisher_id,
                                                 payload={
                                                     "message": _msg,
                                                     "exception": ex
                                                 }))
                    else:
                        if _type.lower() == "list":
                            value = list()
                        if _type.lower() == "json":
                            value = dict()

            setattr(_copy, item['name'], value)
        return _copy
コード例 #20
0
    def run(self):
        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message":
                    "Checking if volume %s stopped" %
                    self.parameters['Volume.volname']
                },
                job_id=self.parameters["job_id"],
                flow_id=self.parameters["flow_id"],
                cluster_id=NS.tendrl_context.integration_id,
            ))
        try:
            fetched_volume = Volume(
                vol_id=self.parameters['Volume.vol_id']).load()
        except etcd.EtcdKeyNotFound:
            Event(
                Message(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Volume %s does not exist" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False

        if fetched_volume.status == "Stopped":
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Volume %s is stopped" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return True
        else:
            Event(
                Message(
                    priority="warning",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Volume %s is already started" %
                        self.parameters['Volume.volname']
                    },
                    job_id=self.parameters["job_id"],
                    flow_id=self.parameters["flow_id"],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False
コード例 #21
0
    def run(self):
        try:
            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            if integration_id is None:
                raise FlowExecutionFailedError(
                    "TendrlContext.integration_id cannot be empty")

            supported_sds = NS.compiled_definitions.get_parsed_defs(
            )['namespace.tendrl']['supported_sds']
            sds_name = self.parameters["TendrlContext.sds_name"]
            if sds_name not in supported_sds:
                raise FlowExecutionFailedError("SDS (%s) not supported" %
                                               sds_name)

            ssh_job_ids = []
            ssh_job_ids = \
                flow_utils.gluster_create_ssh_setup_jobs(
                    self.parameters,
                    skip_current_node=True
                )

            while True:
                time.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    job = NS.tendrl.objects.Job(job_id=job_id).load()
                    all_status[job_id] = job.status

                _failed = {
                    _jid: status
                    for _jid, status in all_status.iteritems()
                    if status == "failed"
                }
                if _failed:
                    raise FlowExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" %
                        (str(_failed), integration_id))
                if all(
                    [status == "finished" for status in all_status.values()]):
                    logger.log("info",
                               NS.publisher_id, {
                                   "message":
                                   "SSH setup completed for all "
                                   "nodes in cluster %s" % integration_id
                               },
                               job_id=self.parameters['job_id'],
                               flow_id=self.parameters['flow_id'])

                    break

            # SSH setup jobs finished above, now install sds
            # bits and create cluster
            logger.log("info",
                       NS.publisher_id, {
                           "message":
                           "Expanding Gluster Storage"
                           " Cluster %s" % integration_id
                       },
                       job_id=self.parameters['job_id'],
                       flow_id=self.parameters['flow_id'])
            gluster_help.expand_gluster(self.parameters)
            logger.log(
                "info",
                NS.publisher_id, {
                    "message":
                    "SDS install/config completed on newly "
                    "expanded nodes, Please wait while "
                    "tendrl-node-agents detect sds details on the newly "
                    "expanded nodes %s" % self.parameters['Node[]']
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'])

            # Wait till detected cluster in populated for nodes
            while True:
                time.sleep(3)
                all_status = []
                detected_cluster = ""
                different_cluster_id = False
                dc = ""
                for node in self.parameters['Node[]']:
                    try:
                        dc = NS.tendrl.objects.DetectedCluster(
                            node_id=node).load()
                        if not detected_cluster:
                            detected_cluster = dc.detected_cluster_id
                        else:
                            if detected_cluster != dc.detected_cluster_id:
                                all_status.append(False)
                                different_cluster_id = True
                                break
                        all_status.append(True)
                    except etcd.EtcdKeyNotFound:
                        all_status.append(False)
                if different_cluster_id:
                    raise FlowExecutionFailedError(
                        "Seeing different detected cluster id in"
                        " different nodes. %s and %s" %
                        (detected_cluster, dc.detected_cluster_id))

                if all_status:
                    if all(all_status):
                        break

            # Create the params list for import cluster flow
            new_params = dict()
            new_params['Node[]'] = self.parameters['Node[]']
            new_params['TendrlContext.integration_id'] = integration_id

            # Get node context for one of the nodes from list
            dc = NS.tendrl.objects.DetectedCluster(
                node_id=self.parameters['Node[]'][0]).load()
            sds_pkg_name = dc.sds_pkg_name
            new_params['import_after_expand'] = True
            sds_pkg_version = dc.sds_pkg_version
            new_params['DetectedCluster.sds_pkg_name'] = \
                sds_pkg_name
            new_params['DetectedCluster.sds_pkg_version'] = \
                sds_pkg_version

            tags = []
            for node in self.parameters['Node[]']:
                tags.append("tendrl/node_%s" % node)
            payload = {
                "tags": tags,
                "run": "tendrl.flows.ImportCluster",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            # release lock before import cluster
            flow_utils.release_node_lock(self.parameters)

            NS.tendrl.objects.Job(job_id=_job_id,
                                  status="new",
                                  payload=payload).save()
            logger.log(
                "info",
                NS.publisher_id, {
                    "message":
                    "Please wait while Tendrl imports ("
                    "job_id: %s) newly expanded "
                    "%s storage nodes in cluster %s" %
                    (_job_id, sds_pkg_name,
                     NS.tendrl.objects.Cluster(
                         integration_id=integration_id).load().short_name)
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'])
        except Exception as ex:
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock if any exception came
            flow_utils.release_node_lock(self.parameters)
コード例 #22
0
def sync_volumes(
    volumes, index,
    vol_options,
    sync_ttl,
    cluster_short_name,
    devicetree
):
    NS.node_context = NS.tendrl.objects.NodeContext().load()
    tag_list = NS.node_context.tags
    # Raise alerts for volume state change.
    cluster_provisioner = "provisioner/%s" % NS.tendrl_context.integration_id
    if cluster_provisioner in tag_list:
        try:
            _volume = NS.tendrl.objects.GlusterVolume(
                NS.tendrl_context.integration_id,
                vol_id=volumes['volume%s.id' % index]
            ).load()
            if _volume.locked_by and 'job_id' in _volume.locked_by and \
                _volume.current_job.get('status', '') == 'in_progress':
                # There is a job active on volume. skip the sync
                return
            stored_volume_status = _volume.status
            current_status = volumes['volume%s.status' % index]
            if stored_volume_status not in [None, ""] and \
                current_status != stored_volume_status:
                msg = ("Status of volume: %s in cluster %s "
                       "changed from %s to %s") % (
                           volumes['volume%s.name' % index],
                           cluster_short_name,
                           stored_volume_status,
                           current_status)
                instance = "volume_%s" % volumes[
                    'volume%s.name' % index
                ]
                event_utils.emit_event(
                    "volume_status",
                    current_status,
                    msg,
                    instance,
                    'WARNING' if current_status == 'Stopped'
                    else 'INFO',
                    tags={"entity_type": RESOURCE_TYPE_VOLUME,
                          "volume_name": volumes['volume%s.name' % index]
                          }
                )
        except (KeyError, etcd.EtcdKeyNotFound) as ex:
            if isinstance(ex, KeyError):
                raise ex
            pass

        volume = NS.tendrl.objects.GlusterVolume(
            NS.tendrl_context.integration_id,
            vol_id=volumes['volume%s.id' % index]
        ).load()
        volume.vol_type = "arbiter" \
            if int(volumes['volume%s.arbiter_count' % index]) > 0 \
            else volumes['volume%s.type' % index]
        volume.name = volumes['volume%s.name' % index]
        volume.transport_type = volumes['volume%s.transport_type' % index]
        volume.status = volumes['volume%s.status' % index]
        volume.brick_count = volumes['volume%s.brickcount' % index]
        volume.snap_count = volumes['volume%s.snap_count' % index]
        volume.stripe_count = volumes['volume%s.stripe_count' % index]
        volume.replica_count = volumes['volume%s.replica_count' % index]
        volume.subvol_count = volumes['volume%s.subvol_count' % index]
        volume.arbiter_count = volumes['volume%s.arbiter_count' % index]
        volume.disperse_count = volumes['volume%s.disperse_count' % index]
        volume.redundancy_count = volumes['volume%s.redundancy_count' % index]
        volume.quorum_status = volumes['volume%s.quorum_status' % index]
        volume.snapd_status = volumes[
            'volume%s.snapd_svc.online_status' % index]
        volume.snapd_inited = volumes['volume%s.snapd_svc.inited' % index]
        if NS.tendrl.objects.GlusterVolume(
            NS.tendrl_context.integration_id,
            vol_id=volumes['volume%s.id' % index]
        ).exists():
            existing_vol = NS.tendrl.objects.GlusterVolume(
                NS.tendrl_context.integration_id,
                vol_id=volumes['volume%s.id' % index]
            ).load()
            volume_profiling_old_value = existing_vol.profiling_enabled
        else:
            volume_profiling_old_value = volume.profiling_enabled
        if ('volume%s.profile_enabled' % index) in volumes:
            value = int(volumes['volume%s.profile_enabled' % index])
            if value == 1:
                volume_profiling_new_value = "yes"
            else:
                volume_profiling_new_value = "no"
        else:
            volume_profiling_new_value = None
        volume.profiling_enabled = volume_profiling_new_value
        if volume_profiling_old_value not in [None, ""] and \
            volume_profiling_old_value != volume_profiling_new_value:
            # Raise alert for the same value change
            msg = ("Value of volume profiling for volume: %s "
                   "of cluster %s changed from %s to %s" % (
                       volumes['volume%s.name' % index],
                       cluster_short_name,
                       volume_profiling_old_value,
                       volume_profiling_new_value))
            instance = "volume_%s" % \
                volumes['volume%s.name' % index]
            event_utils.emit_event(
                "volume_profiling_status",
                volume_profiling_new_value,
                msg,
                instance,
                'INFO',
                tags={
                    "entity_type": RESOURCE_TYPE_BRICK,
                    "volume_name": volumes[
                        'volume%s.name' % index
                    ]
                }
            )
        volume.save(ttl=sync_ttl)
        # Save the default values of volume options
        vol_opt_dict = {}
        for opt_count in \
            range(1, int(vol_options['volume%s.options.count' % index])):
            vol_opt_dict[
                vol_options[
                    'volume%s.options.key%s' % (index, opt_count)
                ]
            ] = vol_options[
                'volume%s.options.value%s' % (index, opt_count)
            ]
        volume.options = vol_opt_dict
        volume.save()

    rebal_det = NS.gluster.objects.RebalanceDetails(
        vol_id=volumes['volume%s.id' % index],
        rebal_id=volumes['volume%s.rebalance.id' % index],
        rebal_status=volumes['volume%s.rebalance.status' % index],
        rebal_failures=volumes['volume%s.rebalance.failures' % index],
        rebal_skipped=volumes['volume%s.rebalance.skipped' % index],
        rebal_lookedup=volumes['volume%s.rebalance.lookedup' % index],
        rebal_files=volumes['volume%s.rebalance.files' % index],
        rebal_data=volumes['volume%s.rebalance.data' % index],
        time_left=volumes.get('volume%s.rebalance.time_left' % index),
    )
    rebal_det.save(ttl=sync_ttl)
    georep_details.save_georep_details(volumes, index)

    b_index = 1
    # ipv4 address of current node
    try:
        network_ip = []
        networks = NS.tendrl.objects.NodeNetwork().load_all()
        for network in networks:
            if network.ipv4:
                network_ip.extend(network.ipv4)
    except etcd.EtcdKeyNotFound as ex:
        Event(
            ExceptionMessage(
                priority="debug",
                publisher=NS.publisher_id,
                payload={
                    "message": "Could not find "
                    "any ipv4 networks for node"
                    " %s" % NS.node_context.node_id,
                    "exception": ex
                }
            )
        )
    while True:
        try:
            # Update brick node wise
            hostname = volumes[
                'volume%s.brick%s.hostname' % (index, b_index)
            ]
            ip = socket.gethostbyname(hostname)
            try:
                node_id = etcd_utils.read("indexes/ip/%s" % ip).value
                fqdn = NS.tendrl.objects.ClusterNodeContext(
                    node_id=node_id
                ).load().fqdn
                cluster_node_ids = etcd_utils.read(
                    "indexes/tags/tendrl/integration/%s" %
                    NS.tendrl_context.integration_id
                ).value
                cluster_node_ids = json.loads(cluster_node_ids)
                if NS.node_context.fqdn != fqdn or \
                        node_id not in cluster_node_ids:
                    b_index += 1
                    continue
            except(TypeError, etcd.EtcdKeyNotFound):
                b_index += 1
                continue
            sub_vol_size = (int(
                volumes['volume%s.brickcount' % index]
            )) / int(
                volumes['volume%s.subvol_count' % index]
            )
            brick_name = NS.node_context.fqdn
            brick_name += ":"
            brick_name += volumes['volume%s.brick%s' '.path' % (
                index,
                b_index
            )].split(":")[-1].replace("/", "_")

            # Raise alerts if the brick path changes
            try:
                stored_brick = NS.tendrl.objects.GlusterBrick(
                    NS.tendrl_context.integration_id,
                    NS.node_context.fqdn,
                    brick_dir=brick_name.split(":_")[-1]
                ).load()
                current_status = volumes.get(
                    'volume%s.brick%s.status' % (index, b_index)
                )
                if stored_brick.status and \
                    current_status != stored_brick.status:
                    msg = ("Brick:%s in volume:%s has %s"
                           ) % (
                               volumes['volume%s.brick%s' '.path' % (
                                   index,
                                   b_index
                               )],
                               volumes['volume%s.' 'name' % index],
                               current_status)
                    instance = "volume_%s|brick_%s" % (
                        volumes['volume%s.name' % index],
                        volumes['volume%s.brick%s.path' % (
                            index,
                            b_index
                        )]
                    )
                    event_utils.emit_event(
                        "brick_status",
                        current_status,
                        msg,
                        instance,
                        'WARNING' if current_status == 'Stopped'
                        else 'INFO',
                        tags={"entity_type": RESOURCE_TYPE_BRICK,
                              "volume_name": volumes[
                                  'volume%s.' 'name' % index]
                              }
                    )

            except etcd.EtcdKeyNotFound:
                pass

            brk_pth = "clusters/%s/Volumes/%s/Bricks/subvolume%s/%s"

            vol_brick_path = brk_pth % (
                NS.tendrl_context.integration_id,
                volumes['volume%s.id' % index],
                str((b_index - 1) / sub_vol_size),
                brick_name
            )

            etcd_utils.write(vol_brick_path, "")
            brick = NS.tendrl.objects.GlusterBrick(
                NS.tendrl_context.integration_id,
                NS.node_context.fqdn,
                brick_dir=brick_name.split(":_")[-1]
            ).load()
            brick.integration_id = NS.tendrl_context.integration_id
            brick.fqdn = NS.node_context.fqdn
            brick.brick_dir = brick_name.split(":_")[-1]
            brick.name = brick_name
            brick.vol_id = volumes['volume%s.id' % index]
            brick.sequence_number = b_index
            brick.brick_path = volumes[
                'volume%s.brick%s.path' % (index, b_index)
            ]
            brick.hostname = volumes.get(
                'volume%s.brick%s.hostname' % (index, b_index)
            )
            brick.port = volumes.get(
                'volume%s.brick%s.port' % (index, b_index)
            )
            brick.vol_name = volumes['volume%s.name' % index]
            brick.used = True
            brick.node_id = NS.node_context.node_id
            brick.status = volumes.get(
                'volume%s.brick%s.status' % (index, b_index)
            )
            brick.filesystem_type = volumes.get(
                'volume%s.brick%s.filesystem_type' % (index, b_index)
            )
            brick.mount_opts = volumes.get(
                'volume%s.brick%s.mount_options' % (index, b_index)
            )
            brick.utilization = brick_utilization.brick_utilization(
                volumes['volume%s.brick%s.path' % (index, b_index)]
            )
            brick.client_count = volumes.get(
                'volume%s.brick%s.client_count' % (index, b_index)
            )
            brick.is_arbiter = volumes.get(
                'volume%s.brick%s.is_arbiter' % (index, b_index)
            )
            brick.save(ttl=sync_ttl)
            # sync brick device details
            brick_device_details.\
                update_brick_device_details(
                    brick_name,
                    volumes[
                        'volume%s.brick%s.path' % (
                            index, b_index)
                    ],
                    devicetree,
                    sync_ttl
                )

            # Sync the brick client details
            c_index = 1
            if volumes.get(
                'volume%s.brick%s.client_count' % (index, b_index)
            ) > 0:
                while True:
                    try:
                        NS.gluster.objects.ClientConnection(
                            brick_name=brick_name,
                            fqdn=NS.node_context.fqdn,
                            brick_dir=brick_name.split(":_")[-1],
                            hostname=volumes[
                                'volume%s.brick%s.client%s.hostname' % (
                                    index, b_index, c_index
                                )
                            ],
                            bytesread=volumes[
                                'volume%s.brick%s.client%s.bytesread' % (
                                    index, b_index, c_index
                                )
                            ],
                            byteswrite=volumes[
                                'volume%s.brick%s.client%s.byteswrite' % (
                                    index, b_index, c_index
                                )
                            ],
                            opversion=volumes[
                                'volume%s.brick%s.client%s.opversion' % (
                                    index, b_index, c_index
                                )
                            ]
                        ).save(ttl=sync_ttl)
                    except KeyError:
                        break
                    c_index += 1
            sync_ttl += 4
            b_index += 1
        except KeyError:
            break
    return b_index
コード例 #23
0
ファイル: __init__.py プロジェクト: rishubhjain/commons
    def save(self, update=True, ttl=None):
        self.render()
        if "Message" not in self.__class__.__name__:
            try:
                # Generate current in memory object hash
                self.hash = self._hash()
                _hash_key = "/{0}/hash".format(self.value)
                _stored_hash = None
                try:
                    _stored_hash = NS._int.client.read(_hash_key).value
                except (etcd.EtcdConnectionFailed, etcd.EtcdException) as ex:
                    if type(ex) != etcd.EtcdKeyNotFound:
                        NS._int.reconnect()
                        _stored_hash = NS._int.client.read(_hash_key).value
                if self.hash == _stored_hash:
                    # No changes in stored object and current object,
                    # dont save current object to central store
                    if ttl:
                        etcd_utils.refresh(self.value, ttl)
                    return
            except TypeError:
                # no hash for this object, save the current hash as is
                pass

        if update:
            current_obj = self.load()
            for attr, val in vars(self).iteritems():
                if isinstance(val, (types.FunctionType,
                                    types.BuiltinFunctionType,
                                    types.MethodType, types.BuiltinMethodType,
                                    types.UnboundMethodType)) or \
                        attr.startswith("_") or attr in ['value', 'list']:
                    continue

                if val is None and hasattr(current_obj, attr):
                    # if self.attr is None, use attr value from central
                    # store (i.e. current_obj.attr)
                    if getattr(current_obj, attr):
                        setattr(self, attr, getattr(current_obj, attr))

        self.updated_at = str(time_utils.now())
        for item in self.render():
            '''
                Note: Log messages in this file have try-except
                blocks to run
                in the condition when the node_agent has not been
                started and
                name spaces are being created.
            '''
            try:
                Event(
                    Message(priority="debug",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Writing %s to %s" %
                                (item['key'], item['value'])
                            }))
            except KeyError:
                sys.stdout.write("Writing %s to %s" %
                                 (item['key'], item['value']))
            # convert list, dict (json) to python based on definitions
            _type = self._defs.get("attrs", {}).get(item['name'],
                                                    {}).get("type")
            if _type:
                if _type.lower() in ['json', 'list']:
                    if item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError as ex:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            Event(
                                ExceptionMessage(priority="debug",
                                                 publisher=NS.publisher_id,
                                                 payload={
                                                     "message": _msg,
                                                     "exception": ex
                                                 }))
            try:
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
            except (etcd.EtcdConnectionFailed, etcd.EtcdException):
                NS._int.wreconnect()
                NS._int.wclient.write(item['key'], item['value'], quorum=True)
        if ttl:
            etcd_utils.refresh(self.value, ttl)
コード例 #24
0
def brick_status_alert(hostname):
    try:
        # fetching brick details of disconnected node
        lock = None
        path = "clusters/%s/Bricks/all/%s" % (
            NS.tendrl_context.integration_id,
            hostname
        )
        lock = etcd.Lock(
            NS._int.client,
            path
        )
        lock.acquire(
            blocking=True,
            lock_ttl=60
        )
        if lock.is_acquired:
            bricks = NS.tendrl.objects.GlusterBrick(
                NS.tendrl_context.integration_id,
                fqdn=hostname
            ).load_all()
            for brick in bricks:
                if brick.status.lower() == BRICK_STARTED:
                    # raise an alert for brick
                    msg = (
                        "Brick:%s in volume:%s has %s") % (
                            brick.brick_path,
                            brick.vol_name,
                            BRICK_STOPPED.title()
                        )
                    instance = "volume_%s|brick_%s" % (
                        brick.vol_name,
                        brick.brick_path,
                    )
                    event_utils.emit_event(
                        "brick_status",
                        BRICK_STOPPED.title(),
                        msg,
                        instance,
                        'WARNING',
                        tags={"entity_type": RESOURCE_TYPE_BRICK,
                              "volume_name": brick.vol_name,
                              "node_id": brick.node_id,
                              "fqdn": brick.hostname
                              }
                    )
                    # Update brick status as stopped
                    brick.status = BRICK_STOPPED.title()
                    brick.save()
                    lock.release()
    except (
        etcd.EtcdException,
        KeyError,
        ValueError,
        AttributeError
    ) as ex:
        Event(
            ExceptionMessage(
                priority="error",
                publisher=NS.publisher_id,
                payload={
                    "message": "Unable to raise an brick status "
                               "alert for host %s" % hostname,
                    "exception": ex
                }
            )
        )
    finally:
        if isinstance(lock, etcd.lock.Lock) and lock.is_acquired:
            lock.release()
コード例 #25
0
 def shutdown():
     Event(
         Message(priority="info",
                 publisher=NS.publisher_id,
                 payload={"message": "Signal handler: stopping"}))
     complete.set()
コード例 #26
0
ファイル: sds_detect.py プロジェクト: brainfunked/node_agent
def sync():
    try:
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={"message": "Running SDS detection"}))
        try:
            sds_discovery_manager = sds_manager.SDSDiscoveryManager()
        except ValueError as ex:
            Event(
                ExceptionMessage(priority="debug",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message":
                                     "Failed to init SDSDiscoveryManager.",
                                     "exception": ex
                                 }))
            return

        # Execute the SDS discovery plugins and tag the nodes with data
        for plugin in sds_discovery_manager.get_available_plugins():
            sds_details = plugin.discover_storage_system()
            if ('detected_cluster_id' in sds_details
                    and sds_details['detected_cluster_id'] != ""):
                if sds_details:
                    try:
                        dc = NS.tendrl.objects.DetectedCluster().load()
                        dc_changed = False
                        if dc.detected_cluster_id:
                            if dc.detected_cluster_id != sds_details.get(
                                    'detected_cluster_id'):
                                dc_changed = True
                        else:
                            time.sleep(3)

                        integration_index_key = \
                            "indexes/detected_cluster_id_to_integration_id/" \
                            "%s" % sds_details['detected_cluster_id']
                        try:
                            if dc_changed:
                                integration_id = \
                                    NS.tendrl_context.integration_id
                                NS._int.wclient.write(integration_index_key,
                                                      integration_id)
                            else:
                                integration_id = str(uuid.uuid4())
                                NS._int.wclient.write(integration_index_key,
                                                      integration_id,
                                                      prevExist=False)
                        except etcd.EtcdAlreadyExist:
                            if not dc_changed:
                                integration_id = NS._int.client.read(
                                    integration_index_key).value
                        finally:
                            NS.tendrl_context.integration_id = integration_id
                            NS.tendrl_context.cluster_id = sds_details.get(
                                'detected_cluster_id')
                            NS.tendrl_context.cluster_name = sds_details.get(
                                'detected_cluster_name')
                            NS.tendrl_context.sds_name = sds_details.get(
                                'pkg_name')
                            NS.tendrl_context.sds_version = sds_details.get(
                                'pkg_version')
                            NS.tendrl_context.save()

                        NS.node_context = NS.node_context.load()
                        integration_tag = "tendrl/integration/%s" % \
                                          integration_id
                        detected_cluster_tag = "detected_cluster/%s" % \
                                               sds_details[
                                                   'detected_cluster_id']
                        NS.node_context.tags += [
                            detected_cluster_tag, integration_tag
                        ]
                        NS.node_context.tags = list(set(NS.node_context.tags))
                        NS.node_context.save()
                        _cluster = NS.tendrl.objects.Cluster(
                            integration_id=NS.tendrl_context.integration_id
                        ).load()

                        NS.tendrl.objects.DetectedCluster(
                            detected_cluster_id=sds_details.get(
                                'detected_cluster_id'),
                            detected_cluster_name=sds_details.get(
                                'detected_cluster_name'),
                            sds_pkg_name=sds_details.get('pkg_name'),
                            sds_pkg_version=sds_details.get('pkg_version'),
                        ).save()

                        if _cluster.is_managed == "yes":
                            continue
                        else:
                            _cluster.is_managed = "no"
                            _cluster.save()

                    except (etcd.EtcdException, KeyError) as ex:
                        Event(
                            ExceptionMessage(priority="debug",
                                             publisher=NS.publisher_id,
                                             payload={
                                                 "message":
                                                 "Failed SDS detection",
                                                 "exception": ex
                                             }))
                    break
    except Exception as ex:
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 "node_sync "
                                 "SDS detection failed: " + ex.message,
                                 "exception":
                                 ex
                             }))
コード例 #27
0
def sync():
    try:
        _keep_alive_for = int(NS.config.data.get("sync_interval", 10)) + 250
        interfaces = get_node_network()
        if len(interfaces) > 0:
            for interface in interfaces:
                NS.tendrl.objects.NodeNetwork(**interface).save(
                    ttl=_keep_alive_for)
                if interface['ipv4']:
                    for ipv4 in interface['ipv4']:
                        index_key = "/indexes/ip/%s" % ipv4
                        try:
                            NS._int.wclient.write(index_key,
                                                  NS.node_context.node_id,
                                                  prevExist=False)
                        except etcd.EtcdAlreadyExist:
                            pass
                            # TODO(team) add ipv6 support
                            # if interface['ipv6']:
                            #    for ipv6 in interface['ipv6']:
                            #        index_key = "/indexes/ip/%s/%s" % (ipv6,
                            #
                            # NS.node_context.node_id)
                            #        NS._int.wclient.write(index_key, 1)

        # global network
        if len(interfaces) > 0:
            for interface in interfaces:
                if interface["subnet"] is not "":
                    NS.node_agent.objects.GlobalNetwork(**interface).save(
                        ttl=_keep_alive_for)
        try:
            networks = NS._int.client.read("/networks")
            for network in networks.leaves:
                try:
                    # it will delete any node with empty network detail in
                    # subnet, if one entry present then deletion never happen
                    NS._int.wclient.delete(
                        "%s/%s" % (network.key, NS.node_context.node_id),
                        dir=True)
                    # it will delete any subnet dir when it is empty
                    # if one entry present then deletion never happen
                    NS._int.wclient.delete(network.key, dir=True)
                except (etcd.EtcdKeyNotFound, etcd.EtcdDirNotEmpty):
                    continue
        except etcd.EtcdKeyNotFound as ex:
            Event(
                ExceptionMessage(priority="debug",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": "Given key is not present in "
                                     "etcd .",
                                     "exception": ex
                                 }))
    except Exception as ex:
        _msg = "node_sync networks sync failed: " + ex.message
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message": _msg,
                                 "exception": ex
                             }))
コード例 #28
0
ファイル: __init__.py プロジェクト: Tendrl/ceph-integration
    def _sync_rbds(self):
        try:
            pools = NS._int.client.read("clusters/%s/Pools" %
                                        NS.tendrl_context.integration_id,
                                        recursive=True)
            for child in pools._children:
                pool_id = child['key'].split('/')[-1]
                pool_name = NS._int.client.read(
                    "clusters/%s/Pools/%s/pool_name" %
                    (NS.tendrl_context.integration_id, pool_id)).value
                rbd_details = self._get_rbds(pool_name)
                # Rbd out of band delete handling
                try:
                    rbds = NS._int.client.read(
                        "clusters/%s/Pools/%s/Rbds" %
                        (NS.tendrl_context.integration_id, pool_id))
                    old_rbds = []
                    for rbd in rbds.leaves:
                        old_rbds.append(rbd.key.split("/")[-1])
                    new_rbds = []
                    for k, v in rbd_details.iteritems():
                        new_rbds.append(k)
                    delete_rbds = set(old_rbds) - set(new_rbds)
                    for id in delete_rbds:
                        NS._int.client.delete(
                            "clusters/%s/Pools/%s/Rbds/%s" %
                            (NS.tendrl_context.integration_id, pool_id, id),
                            recursive=True)
                except etcd.EtcdKeyNotFound as ex:
                    Event(
                        ExceptionMessage(
                            priority="debug",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "No rbds found for ceph cluster %s" %
                                NS.tendrl_context.integration_id,
                                "exception":
                                ex
                            }))
                for k, v in rbd_details.iteritems():
                    NS.ceph.objects.Rbd(
                        name=k,
                        size=v['size'],
                        pool_id=pool_id,
                        flags=v['flags'],
                        provisioned=self._to_bytes(v['provisioned'])
                        if v.get("provisioned") else None,
                        used=self._to_bytes(v['used'])).save()
                try:
                    rbds = NS._int.client.read(
                        "clusters/%s/Pools/%s/Rbds" %
                        (NS.tendrl_context.integration_id, pool_id))
                except etcd.EtcdKeyNotFound:
                    # no rbds for pool, continue
                    continue

                for entry in rbds.leaves:
                    fetched_rbd = NS.ceph.objects.Rbd(
                        pool_id=pool_id,
                        name=entry.key.split("Rbds/")[-1]).load()
                    if fetched_rbd.name not in rbd_details.keys():
                        NS._int.client.delete(
                            "clusters/%s/Pools/%s/Rbds/%s" %
                            (NS.tendrl_context.integration_id, pool_id,
                             fetched_rbd.name),
                            recursive=True)
        except etcd.EtcdKeyNotFound:
            pass
コード例 #29
0
def sync(sync_ttl=None):
    try:
        tags = []
        # update node agent service details
        logger.log("debug", NS.publisher_id,
                   {"message": "node_sync, Updating Service data"})
        for service in TENDRL_SERVICES:
            s = NS.tendrl.objects.Service(service=service)
            if s.running:
                service_tag = NS.compiled_definitions.get_parsed_defs(
                )['namespace.tendrl']['tags'][service.strip("@*")]
                tags.append(service_tag)

                if service_tag == "tendrl/server":
                    tags.append("tendrl/monitor")
            s.save()

        if "tendrl/monitor" not in tags and \
            NS.tendrl_context.integration_id:
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=NS.tendrl_context.integration_id).load()
            # Try to claim orphan "provisioner_%integration_id" tag
            _tag = "provisioner/%s" % _cluster.integration_id
            _is_new_provisioner = False
            NS.node_context = NS.tendrl.objects.NodeContext().load()
            if _tag not in NS.node_context.tags:
                try:
                    _index_key = "/indexes/tags/%s" % _tag
                    _node_id = json.dumps([NS.node_context.node_id])
                    etcd_utils.write(_index_key, _node_id, prevExist=False)
                    etcd_utils.refresh(_index_key, sync_ttl + 50)
                    tags.append(_tag)
                    _is_new_provisioner = True
                except etcd.EtcdAlreadyExist:
                    pass

        # updating node context with latest tags
        logger.log(
            "debug", NS.publisher_id,
            {"message": "node_sync, updating node context "
             "data with tags"})
        NS.node_context = NS.tendrl.objects.NodeContext().load()
        current_tags = list(NS.node_context.tags)
        tags += current_tags
        NS.node_context.tags = list(set(tags))
        NS.node_context.tags.sort()
        current_tags.sort()
        if NS.node_context.tags != current_tags:
            NS.node_context.save()

        if "tendrl/monitor" not in tags and \
            NS.tendrl_context.integration_id:
            _cluster = _cluster.load()
            if _is_new_provisioner and _cluster.is_managed == "yes":
                _msg = "node_sync, NEW provisioner node found! "\
                    "re-configuring monitoring (job-id: %s) on this node"
                payload = {
                    "tags": ["tendrl/node_%s" % NS.node_context.node_id],
                    "run": "tendrl.flows.ConfigureMonitoring",
                    "status": "new",
                    "parameters": {
                        'TendrlContext.integration_id':
                        NS.tendrl_context.integration_id
                    },
                    "type": "node"
                }
                _job_id = str(uuid.uuid4())
                NS.tendrl.objects.Job(job_id=_job_id,
                                      status="new",
                                      payload=payload).save()
                logger.log("debug", NS.publisher_id,
                           {"message": _msg % _job_id})

        # Update /indexes/tags/:tag = [node_ids]
        for tag in NS.node_context.tags:

            index_key = "/indexes/tags/%s" % tag
            _node_ids = []
            try:
                _node_ids = etcd_utils.read(index_key).value
                _node_ids = json.loads(_node_ids)
            except etcd.EtcdKeyNotFound:
                pass

            if _node_ids:
                if "provisioner" in tag:
                    # Check if this is a stale provisioner
                    if NS.node_context.node_id != _node_ids[0]:
                        NS.node_context.tags.remove(tag)
                        NS.node_context.save()
                        continue
                if NS.node_context.node_id in _node_ids:
                    if sync_ttl and len(_node_ids) == 1:
                        etcd_utils.refresh(index_key, sync_ttl + 50)

                    continue
                else:
                    _node_ids += [NS.node_context.node_id]
            else:
                _node_ids = [NS.node_context.node_id]
            _node_ids = list(set(_node_ids))

            etcd_utils.write(index_key, json.dumps(_node_ids))
            if sync_ttl and len(_node_ids) == 1:
                etcd_utils.refresh(index_key, sync_ttl + 50)
        logger.log("debug", NS.publisher_id,
                   {"message": "node_sync, Updating detected "
                    "platform"})
    except Exception as ex:
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 "node_sync service and indexes "
                                 "sync failed: " + ex.message,
                                 "exception":
                                 ex
                             }))
コード例 #30
0
ファイル: __init__.py プロジェクト: Tendrl/ceph-integration
    def _sync_ec_profiles(self):
        """Invokes the below CLI commands

        1.
        ```ceph osd erasure-code-profile ls```

        and required output format is a list of ec profiles separated with new
        lines as below

        ```
           default
           k4m2
        ```
        2.
        ```ceph osd erasure-code-profile get {name}```

        and the required output format is '=' separated values in multiple
        lines

        ```
           k=2
           m=1
           plugin=jerasure
           directory={dir}
        ```

        """
        required_ec_profiles = [(2, 1), (4, 2), (6, 3), (8, 4)]
        ec_profile_details = {}

        commands = ['osd', 'erasure-code-profile', 'ls']
        cmd_out = ceph.ceph_command(NS.tendrl_context.cluster_name, commands)
        if cmd_out['err'] == "":
            ec_profile_list = []
            for item in cmd_out['out'].split('\n'):
                if item != "":
                    ec_profile_list.append(item)

            for ec_profile in ec_profile_list:
                commands = ['osd', 'erasure-code-profile', 'get', ec_profile]
                cmd_out = ceph.ceph_command(NS.tendrl_context.cluster_name,
                                            commands)
                if cmd_out['err'] == "":
                    info = {}
                    for item in cmd_out['out'].split('\n'):
                        if item != "":
                            info[item.split('=')[0]] = \
                                item.split('=')[1].strip()
                            ec_profile_details[ec_profile] = info
        # Ec profile out of band delete handling
            try:
                ec_profiles = NS._int.client.read(
                    "clusters/%s/ECProfiles" %
                    (NS.tendrl_context.integration_id))
                old_ec_profiles = []
                for ec_profile in ec_profiles.leaves:
                    old_ec_profiles.append(ec_profile.key.split("/")[-1])
                new_ec_profiles = []
                for k, v in ec_profile_details.iteritems():
                    new_ec_profiles.append(k)
                delete_ec_profiles = set(old_ec_profiles) - set(
                    new_ec_profiles)
                for id in delete_ec_profiles:
                    NS._int.client.delete(
                        "clusters/%s/ECProfiles/%s" %
                        (NS.tendrl_context.integration_id, id),
                        recursive=True)
            except etcd.EtcdKeyNotFound as ex:
                Event(
                    ExceptionMessage(priority="debug",
                                     publisher=NS.publisher_id,
                                     payload={
                                         "message": "key not found in etcd",
                                         "exception": ex
                                     }))
        available_ec_profiles = []
        for k, v in ec_profile_details.iteritems():
            NS.ceph.objects.ECProfile(
                name=k,
                k=v['k'],
                m=v['m'],
                plugin=v.get('plugin'),
                directory=v.get('directory'),
                ruleset_failure_domain=v.get('ruleset_failure_domain')).save()
            available_ec_profiles.append((int(v['k']), int(v['m'])))

        # Create the missing ec_profile_details
        missing_ec_profiles = [
            item for item in required_ec_profiles
            if item not in available_ec_profiles
        ]
        for item in missing_ec_profiles:
            attrs = dict(name="k%sm%s" % (item[0], item[1]),
                         k=item[0],
                         m=item[1],
                         plugin='jerasure',
                         directory='/usr/lib/ceph/erasure-code')
            crud = Crud()
            crud.create("ec_profile", attrs)