Exemple #1
0
    def run(self):
        retry_count = 0
        while True:
            _cluster = None
            try:
                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=self.
                    parameters["TendrlContext.integration_id"]).load()
            except etcd.EtcdKeyNotFound:
                # pass and continue the time out below
                pass

            if _cluster.exists() and _cluster.is_managed == "yes":
                return True

            retry_count += 1
            time.sleep(1)
            if retry_count == 600:
                logger.log("error",
                           NS.publisher_id, {
                               "message":
                               "Cluster data sync still incomplete. "
                               "Timing out"
                           },
                           job_id=self.parameters['job_id'],
                           flow_id=self.parameters['flow_id'],
                           integration_id=NS.tendrl_context.integration_id)
                raise AtomExecutionFailedError(
                    "Cluster data sync still incomplete. Timing out")
Exemple #2
0
 def update_clusters_alert_count(self):
     cluster_ids = central_store_util.get_cluster_ids()
     for cluster_id in cluster_ids:
         try:
             crit_alerts, warn_alerts = parse_resource_alerts(
                 None,
                 alerting_consts.CLUSTER,
                 cluster_id=cluster_id,
             )
             ClusterAlertCounters(
                 warn_count=len(warn_alerts),
                 crit_count=len(crit_alerts),
                 cluster_id=cluster_id
             ).save(update=False)
         except AlertingError as ex:
             logger.log(
                 "error",
                 NS.get(
                     "publisher_id",
                     None
                 ),
                 {
                     "message": 'Failed to update cluster alert counter.'
                     ' Exception %s' % str(ex)
                 }
             )
             continue
 def __generate_executable_module(self):
     modname = os.path.basename(self.module_path)
     modname = os.path.splitext(modname)[0]
     try:
         (module_data, module_style, shebang) = \
             module_common.modify_module(
                 modname,
                 self.module_path,
                 self.argument_dict,
                 None,
                 task_vars={}
             )
     except Exception as e:
         logger.log(
             "debug",
             self.publisher_id,
             {"message": "Could not generate ansible "
                         "executable data "
                         "for module  : %s. Error: %s" %
                         (self.module_path, str(e))},
             node_id=self.node_id
         )
         raise AnsibleExecutableGenerationFailed(
             module_path=self.module_path,
             err=str(e)
         )
     return module_data
 def __init__(
     self,
     module_path,
     publisher_id=None,
     node_id=None,
     **kwargs
 ):
     self.module_path = modules.__path__[0] + "/" + module_path
     self.publisher_id = publisher_id or NS.publisher_id
     self.node_id = node_id or NS.node_context.node_id
     if not os.path.isfile(self.module_path):
         logger.log(
             "debug",
             self.publisher_id,
             {"message": "Module path: %s does not exist" %
                 self.module_path},
             node_id=self.node_id
         )
         raise AnsibleModuleNotFound(module_path=self.module_path)
     if kwargs == {}:
         logger.log(
             "debug",
             self.publisher_id,
             {"message": "Empty argument dictionary"},
             node_id=self.node_id
         )
         raise ValueError
     else:
         self.argument_dict = kwargs
         self.argument_dict['_ansible_selinux_special_fs'] = \
             ['nfs', 'vboxsf', 'fuse', 'ramfs']
Exemple #5
0
def node_wise_brick_count(cluster_detail):
    local_metrics = [
        "clusters.$integration_id.nodes.$node_name.brick_count."
        "total.$brick_total_count",
        "clusters.$integration_id.nodes.$node_name.brick_count."
        "down.$brick_down_count",
        "clusters.$integration_id.nodes.$node_name.brick_count."
        "up.$brick_up_count"
    ]
    metrics = []
    for metric in local_metrics:
        metric = metric.replace("$integration_id",
                                str(cluster_detail["integration_id"]))
        for node in cluster_detail["Node"]:
            try:
                local_metric = metric.replace("$node_name",
                                              node["fqdn"].replace(".", "_"))
                local_metric = local_metric.replace(
                    local_metric.rsplit(".", 1)[1],
                    str(node[str(
                        local_metric.rsplit(".", 1)[1].replace("$", ""))]))
                metrics.append(copy.deepcopy(local_metric))
            except (AttributeError, KeyError) as ex:
                logger.log(
                    "debug", NS.get("publisher_id", None), {
                        'message':
                        "Failed to create brick metric "
                        "for Node: {0} "
                        "Metric: {1}".format(node, metric) + str(ex)
                    })
    return metrics
Exemple #6
0
 def update_nodes_alert_count(self):
     node_ids = central_store_util.get_node_ids()
     for node_id in node_ids:
         try:
             crit_alerts, warn_alerts = parse_resource_alerts(
                 None,
                 alerting_consts.NODE,
                 node_id=node_id,
             )
             NodeAlertCounters(
                 warn_count=len(warn_alerts),
                 crit_count=len(crit_alerts),
                 node_id=node_id
             ).save(update=False)
         except AlertingError as ex:
             logger.log(
                 "error",
                 NS.get(
                     "publisher_id",
                     None
                 ),
                 {
                     "message": 'Failed to update node alert counter.'
                     ' Exception %s' % str(ex)
                 }
             )
             continue
    def _derive_cluster_id(self):
        cmd = subprocess.Popen("gluster pool list",
                               shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        out, err = cmd.communicate()
        if err or out is None or "Connection failed" in out:
            _msg = "Could not detect SDS:Gluster installation"
            logger.log("debug", NS.publisher_id, {"message": _msg})
            return "", {}
        lines = out.split('\n')[1:]
        gfs_peers_uuid = []
        gfs_peer_data = {}
        for line in lines:
            if line != '':
                peer = line.split()
                # Use the gluster generated pool UUID as unique key
                gfs_peers_uuid.append(peer[0])
                gfs_peer_data[peer[0]] = {
                    "connected": peer[-1],
                    "hostname": peer[-2]
                }

        gfs_peers_uuid.sort()
        return (hashlib.sha256("".join(gfs_peers_uuid)).hexdigest(),
                gfs_peer_data)
Exemple #8
0
    def run(self):
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "%s running" % self.__class__.__name__}
        )
        while not self._complete.is_set():
            _job_sync_interval = 5
            NS.node_context = NS.node_context.load()
            if "tendrl/monitor" in NS.node_context.tags:
                _job_sync_interval = 3

            time.sleep(_job_sync_interval)
            try:
                jobs = etcd_utils.read("/queue")
            except etcd.EtcdKeyNotFound:
                continue

            for job in jobs.leaves:
                # Check job not already locked by some agent
                jid = job.key.split('/')[-1]
                job_lock_key = "/queue/%s/locked_by" % jid
                try:
                    _locked_by = etcd_utils.read(job_lock_key).value
                    if _locked_by:
                        continue
                except etcd.EtcdKeyNotFound:
                    pass

                _job_thread = threading.Thread(
                    target=process_job, args=(jid)
                )
                _job_thread.daemon = True
                _job_thread.start()
                _job_thread.join()
Exemple #9
0
    def __init__(self, ns_name="tendrl", ns_src="tendrl.commons"):
        super(TendrlNS, self).__init__()
        if not hasattr(__builtin__, "NS"):
            setattr(__builtin__, "NS", maps.NamedDict())
            setattr(NS, "_int", maps.NamedDict())
            NS._int.wreconnect = cs_utils.wreconnect
            NS._int.reconnect = cs_utils.reconnect
            NS._int.watchers = dict()
        '''
            Note: Log messages in this file have try-except blocks to run in
            the condition when the node_agent has not been started and name
            spaces are being created.
        '''
        logger.log("info", NS.get("publisher_id", None),
                   {'message': "Creating namespace.%s from source %s"
                    % (ns_name, ns_src)})
        self.ns_name = ns_name
        self.ns_src = ns_src

        self._create_ns()

        self.current_ns = self._get_ns()
        logger.log("info", NS.get("publisher_id", None),
                   {'message': "namespace.%s created!" % self.ns_name})
        self._register_subclasses_to_ns()

        self.setup_definitions()
        self._validate_ns_definitions()
        self.setup_common_objects()
Exemple #10
0
def georep_status(cluster_detail):
    local_metrics = [
        "clusters.$integration_id.georep.total.$total",
        "clusters.$integration_id.georep.up.$up",
        "clusters.$integration_id.georep.down.$down",
        "clusters.$integration_id.georep.partial.$partial",
        "clusters.$integration_id.georep.stopped.$stopped",
        "clusters.$integration_id.georep.paused.$paused",
        "clusters.$integration_id.georep.created.$created"
    ]
    metrics = []
    for metric in local_metrics:
        try:
            local_metric = metric.replace(
                "$integration_id", str(cluster_detail["integration_id"]))
            local_metric = local_metric.replace(
                local_metric.rsplit(".", 1)[1],
                str(cluster_detail["geo_rep"][str(
                    local_metric.rsplit(".", 1)[1].replace("$", ""))]))
            metrics.append(copy.deepcopy(local_metric))
        except (AttributeError, KeyError) as ex:
            logger.log(
                "debug", NS.get("publisher_id", None), {
                    'message':
                    "Failed to create cluster metric {0} "
                    "for cluster {1}".format(
                        metric, str(cluster_detail["integration_id"])) +
                    str(ex)
                })
    return metrics
Exemple #11
0
def node_wise_brick_status(cluster_detail):
    metric = "clusters.$integration_id.nodes.$node_name." \
        "bricks.$brick_name.status.$status"
    metrics = []
    for brick in cluster_detail["Brick"]:
        try:
            local_metric = metric.replace(
                "$integration_id", str(cluster_detail["integration_id"]))
            local_metric = local_metric.replace("$node_name",
                                                brick["host_name"])
            local_metric = local_metric.replace(
                "$brick_name", brick["brick_name"].replace("/", "|"))
            local_metric = local_metric.replace(
                local_metric.rsplit(".", 1)[1],
                str(brick[str(local_metric.rsplit(".", 1)[1].replace("$",
                                                                     ""))]))
            metrics.append(copy.deepcopy(local_metric))
        except (AttributeError, KeyError) as ex:
            logger.log(
                "debug", NS.get("publisher_id", None), {
                    'message':
                    "Failed to create brick metric {0} "
                    "for Brick :{1}".format(metric, brick) + str(ex)
                })
    return metrics
Exemple #12
0
    def run(self):
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "%s running" % self.__class__.__name__}
        )
        while not self._complete.is_set():
            _job_sync_interval = 5
            NS.node_context = NS.node_context.load()
            NS.tendrl_context = NS.tendrl_context.load()
            if "tendrl/monitor" not in NS.node_context.tags:
                if NS.tendrl_context.integration_id is None or \
                        NS.node_context.fqdn is None:
                    time.sleep(_job_sync_interval)
                    continue
            if "tendrl/monitor" in NS.node_context.tags:
                _job_sync_interval = 3

            time.sleep(_job_sync_interval)
            try:
                jobs = NS.tendrl.objects.Job().load_all()
            except etcd.EtcdKeyNotFound:
                continue

            for job in jobs:
                # Check job not already locked by some agent
                if job.locked_by or job.job_id in [None, '']:
                    continue

                _job_thread = threading.Thread(
                    target=process_job, args=(job.job_id,)
                )
                _job_thread.daemon = True
                _job_thread.start()
                _job_thread.join()
    def run(self):
        if NS.gdeploy_plugin.rebalance_volume(
                self.parameters.get('Volume.volname'),
                "stop",
                force=self.parameters.get('Volume.force')):
            logger.log("info",
                       NS.publisher_id, {
                           "message":
                           "Stopped the rebalance for volume %s" %
                           self.parameters['Volume.volname']
                       },
                       job_id=self.parameters["job_id"],
                       flow_id=self.parameters["flow_id"],
                       integration_id=NS.tendrl_context.integration_id)
        else:
            logger.log("error",
                       NS.publisher_id, {
                           "message":
                           "Failed to stop rebalance for volume %s" %
                           self.parameters['Volume.volname']
                       },
                       job_id=self.parameters["job_id"],
                       flow_id=self.parameters["flow_id"],
                       integration_id=NS.tendrl_context.integration_id)
            return False

        return True
def get_conf():

    try:

        # Graphite and Grafana will be running on localhost
        NS.config.data["grafana_host"] = "127.0.0.1"
        NS.config.data["grafana_port"] = 3000

        # Default values for graphite datasource
        NS.config.data["datasource_type"] = "graphite"
        NS.config.data["basicAuth"] = False

        # Grafana related configs
        NS.config.data["datasource"] = []
        NS.config.data["credentials"] = (
            NS.config.data["credentials"]["user"],
            NS.config.data["credentials"]["password"])

    except exceptions.InvalidConfigurationException:
        err = exceptions.InvalidConfigurationException(
            "Error in loading configuration"
        )
        logger.log("info", NS.get("publisher_id", None),
                   {'message': str(err)})

        raise err
    def _setup_gluster_native_message_reciever(self):
        service = svc.Service("glustereventsd")
        message, success = service.start()
        gluster_eventsd = svc_stat.ServiceStatus("glustereventsd")
        if not gluster_eventsd.status():
            if not success:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {
                        "message": "glustereventsd could"
                        " not be started: %s" % message
                    }
                )
                return False

        url = "http://{0}:{1}{2}".format(self.host, str(self.port), self.path)
        cmd = cmd_utils.Command('gluster-eventsapi webhook-add %s' % url)
        out, err, rc = cmd.run()
        if rc != 0:
            severity = "debug" if "Webhook already exists" in err else "error"
            logger.log(
                severity,
                NS.publisher_id,
                {
                    "message": "could not add webhook"
                    " for glustereventsd. {0}: {1}".format(
                        severity,
                        err
                    )
                }
            )
        return True
    def run(self):
        if not self._setup_gluster_native_message_reciever():
            logger.log(
                "error",
                NS.publisher_id,
                {"message": "gluster native message reciever setup failed"}
            )
            return

        # Enable WSGI access logging via Paste
        app_logged = TransLogger(app)

        # Mount the WSGI callable object (app) on the root directory
        cherrypy.tree.graft(app_logged, '/')
        # Set the configuration of the web server
        cherrypy.config.update({
            'engine.autoreload_on': False,
            'log.screen': True,
            'server.socket_port': self.port,
            'server.socket_host': self.host,
            'log.access_file': '',
            'log.error_file': ''
        })
        # Start the CherryPy WSGI web server
        cherrypy.engine.start()
        cherrypy.engine.block()
Exemple #17
0
def get_lvs():
    _lvm_cmd = ("lvm vgs --unquoted --noheading --nameprefixes "
                "--separator $ --nosuffix --units m -o lv_uuid,"
                "lv_name,data_percent,pool_lv,lv_attr,lv_size,"
                "lv_path,lv_metadata_size,metadata_percent,vg_name")
    cmd = cmd_utils.Command(_lvm_cmd, True)
    out, err, rc = cmd.run()
    if rc != 0:
        logger.log("debug", NS.publisher_id, {"message": str(err)})
        return None
    d = {}
    if str(out) != '':
        try:
            out = out.split('\n')
            lst = map(
                lambda x: dict(x),
                map(lambda x: [e.split('=') for e in x],
                    map(lambda x: x.strip().split('$'), out)))

            for i in lst:
                if i['LVM2_LV_ATTR'][0] == 't':
                    k = "%s/%s" % (i['LVM2_VG_NAME'], i['LVM2_LV_NAME'])
                else:
                    k = os.path.realpath(i['LVM2_LV_PATH'])
                d.update({k: i})
        except (ValueError, KeyError) as ex:
            # Keyerror will raise when any changes in attributes name
            # of lvm output
            # ValueError will raise when any problem in output format
            # Because parsing logic will raise error
            logger.log("debug", NS.publisher_id, {"message": str(ex)})
    return d
Exemple #18
0
def alert_job_status(curr_value, msg, integration_id=None, cluster_name=None):
    alert = {}
    alert['source'] = NS.publisher_id
    alert['classification'] = 'cluster'
    alert['pid'] = os.getpid()
    alert['time_stamp'] = tendrl_now().isoformat()
    alert['alert_type'] = 'STATUS'
    severity = "INFO"
    if curr_value.lower() == "failed":
        severity = "WARNING"
    alert['severity'] = severity
    alert['resource'] = 'job_status'
    alert['current_value'] = curr_value
    alert['tags'] = dict(
        message=msg,
        integration_id=integration_id or
        NS.tendrl_context.integration_id,
        cluster_name=cluster_name or
        NS.tendrl_context.cluster_name,
        sds_name=NS.tendrl_context.sds_name,
        fqdn=NS.node_context.fqdn
    )
    alert['node_id'] = NS.node_context.node_id
    if not NS.node_context.node_id:
        return
    logger.log(
        "notice",
        "alerting",
        {'message': json.dumps(alert)}
    )
Exemple #19
0
def volume_count(cluster_detail):
    local_metrics = [
        "clusters.$integration_id.volume_count.total.$volume_total_count",
        "clusters.$integration_id.volume_count.down.$volume_down_count",
        "clusters.$integration_id.volume_count.up.$volume_up_count",
        "clusters.$integration_id.volume_count.partial.$volume_partial_count",
        "clusters.$integration_id.volume_count.degraded.$volume_degraded_count"
    ]
    metrics = []
    for metric in local_metrics:
        try:
            local_metric = metric.replace(
                "$integration_id", str(cluster_detail["integration_id"]))
            local_metric = local_metric.replace(
                local_metric.rsplit(".", 1)[1],
                str(cluster_detail[str(
                    local_metric.rsplit(".", 1)[1].replace("$", ""))]))
            metrics.append(copy.deepcopy(local_metric))
        except (AttributeError, KeyError) as ex:
            logger.log(
                "debug", NS.get("publisher_id", None), {
                    'message':
                    "Failed to create cluster metric "
                    "{0} for cluster {1}".format(
                        metric, str(cluster_detail["integration_id"])) +
                    str(ex)
                })
    return metrics
Exemple #20
0
    def run(self):
        retry_count = 0
        while True:
            _cluster = None
            try:
                _cluster = NS.tendrl.objects.Cluster(
                    integration_id=self.parameters[
                        "TendrlContext.integration_id"
                    ]
                ).load()
            except etcd.EtcdKeyNotFound:
                # pass and continue the time out below
                pass

            if _cluster.exists() and _cluster.is_managed == "yes":
                return True

            retry_count += 1
            time.sleep(1)
            if retry_count == 600:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Cluster data sync still incomplete. "
                                "Timing out"},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    integration_id=NS.tendrl_context.integration_id
                )
                return False
Exemple #21
0
def volume_wise_brick_count(cluster_detail):
    local_metrics = [
        "clusters.$integration_id.volumes.$volume_name."
        "brick_count.total.$total",
        "clusters.$integration_id.volumes.$volume_name."
        "brick_count.down.$down",
        "clusters.$integration_id.volumes.$volume_name."
        "brick_count.up.$up"
    ]
    metrics = []
    for metric in local_metrics:
        metric = metric.replace("$integration_id",
                                str(cluster_detail["integration_id"]))
        for volume in cluster_detail["Volume"]:
            try:
                local_metric = metric.replace("$volume_name", volume["name"])
                local_metric = local_metric.replace(
                    local_metric.rsplit(".", 1)[1],
                    str(cluster_detail["volume_level_brick_count"][str(
                        volume["name"])][str(
                            local_metric.rsplit(".", 1)[1].replace("$", ""))]))
                metrics.append(copy.deepcopy(local_metric))
            except (AttributeError, KeyError) as ex:
                logger.log(
                    "debug", NS.get("publisher_id", None), {
                        'message':
                        "Failed to create volume metric {0} "
                        "for Volume :{1}".format(metric, volume["name"]) +
                        str(ex)
                    })
    return metrics
Exemple #22
0
    def _getBrickList(self, brick_count, sub_vol_len, volume_id):
        try:
            result = NS._int.client.read(
                "clusters/%s/Volumes/%s/Bricks" %
                (NS.tendrl_context.integration_id, volume_id), )
            bricks = result.leaves
        except etcd.EtcdKeyNotFound:
            logger.log(
                "error",
                NS.publisher_id,
                {
                    "message":
                    "Volume %s does not have Bricks directory" %
                    self.parameters['Volume.volname']
                },
                job_id=self.parameters["job_id"],
                flow_id=self.parameters["flow_id"],
                integration_id=NS.tendrl_context.integration_id,
            )
            return []

        b_list = ["" for el in range(brick_count)]

        for el in bricks:
            result = NS._int.client.read(el.key + "/" + "sequence_number")
            b_list[int(result.value) - 1] = el.key.split("/")[-1]

        brick_list = []
        for i in range(brick_count / sub_vol_len):
            sub_vol = []
            for b in b_list[i * sub_vol_len:(i + 1) * sub_vol_len]:
                sub_vol.append(b)
            brick_list.append(sub_vol)
        return brick_list
Exemple #23
0
 def check_service_status(self, services, node):
     required_services_running = True
     for service_name in services:
         service = NS.tendrl.objects.Service(
             service=service_name
         )
         if not service.running:
             if len(service.error) > 0:
                 msg = ("Failed to check status of %s "
                        "on %s. Error: %s" % (
                            service_name,
                            node,
                            service.error
                        ))
             else:
                 msg = ("Service %s is not running on %s, "
                        "Please check the log file to figure out the "
                        "exact problem" % (service_name, node))
             logger.log(
                 "error",
                 NS.get("publisher_id", None),
                 {
                     "message": msg
                 },
                 job_id=self.parameters['job_id'],
                 flow_id=self.parameters['flow_id']
             )
             required_services_running = False
     return required_services_running
Exemple #24
0
 def load_definition(self):
     try:
         logger.log(
             "debug", NS.publisher_id, {
                 "message":
                 "Load definitions (.yml) for "
                 "namespace.%s.objects.%s" %
                 (self._ns.ns_name, self.__class__.__name__)
             })
     except KeyError:
         sys.stdout.write("Load definitions (.yml) for namespace.%s.objects"
                          ".%s \n" %
                          (self._ns.ns_name, self.__class__.__name__))
     try:
         return self._ns.get_obj_definition(self.__class__.__name__)
     except KeyError as ex:
         msg = "Could not find definitions (.yml) for " \
               "namespace.%s.objects.%s" %\
               (self._ns.ns_name, self.__class__.__name__)
         try:
             Event(
                 ExceptionMessage(priority="debug",
                                  publisher=NS.publisher_id,
                                  payload={
                                      "message": "error",
                                      "exception": ex
                                  }))
         except KeyError:
             sys.stdout.write(str(ex) + "\n")
         try:
             logger.log("debug", NS.publisher_id, {"message": msg})
         except KeyError:
             sys.stdout.write(msg + "\n")
         raise Exception(msg)
    def _setup_gluster_native_message_reciever(self):
        service = svc.Service("glustereventsd")
        message, success = service.start()
        gluster_eventsd = svc_stat.ServiceStatus("glustereventsd")
        if not gluster_eventsd.status():
            if not success:
                logger.log(
                    "error", NS.publisher_id, {
                        "message":
                        "glustereventsd could"
                        " not be started: %s" % message
                    })
                return False

        url = "http://{0}:{1}{2}".format(self.host, str(self.port), self.path)
        cmd = cmd_utils.Command('gluster-eventsapi webhook-add %s' % url)
        out, err, rc = cmd.run()
        if rc != 0:
            severity = "info" if "Webhook already exists" in err else "error"
            logger.log(
                severity, NS.publisher_id, {
                    "message":
                    "could not add webhook"
                    " for glustereventsd. {0}: {1}".format(severity, err)
                })
        return True
Exemple #26
0
def update_cluster_alert_count():
    cluster_alert_count = 0
    severity = ["WARNING", "CRITICAL"]
    try:
        alert_counts = get_volume_alert_counts()
        alerts = NS.tendrl.objects.ClusterAlert(
            tags={'integration_id': NS.tendrl_context.integration_id}
        ).load_all()
        for alert in alerts:
            alert.tags = json.loads(alert.tags)
            if alert.severity in severity:
                cluster_alert_count += 1
                if alert.resource in NS.gluster.objects.VolumeAlertCounters(
                        )._defs['relationship'][alert.alert_type.lower()]:
                    vol_name = alert.tags.get('volume_name', None)
                    if vol_name and vol_name in alert_counts.keys():
                        alert_counts[vol_name]['alert_count'] += 1
        # Update cluster alert count
        NS.tendrl.objects.ClusterAlertCounters(
            integration_id=NS.tendrl_context.integration_id,
            alert_count=cluster_alert_count
        ).save()
        # Update volume alert count
        for volume, vol_dict in alert_counts.iteritems():
            NS.gluster.objects.VolumeAlertCounters(
                integration_id=NS.tendrl_context.integration_id,
                alert_count=vol_dict['alert_count'],
                volume_id=vol_dict['vol_id']
            ).save()
    except (etcd.EtcdException, AttributeError) as ex:
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "Unable to update alert count.err: %s" % ex}
        )
def _add_metrics(objects, obj_name, metric, resource):

    metrics = []
    for obj in objects[obj_name]["attrs"]:
        if obj == "name" or obj == "fqdn":
            continue
        local_metric = copy.deepcopy(metric)
        try:
            if isinstance(resource[obj],dict):
                for key, value in resource[obj].items():
                    if key == "details":
                        continue
                    new_metric = local_metric + "." + str(obj) + "." + str(key)
                    metric_value = str(value)
                    final_metric = {new_metric : metric_value}
                    metrics.append(copy.deepcopy(final_metric))
            else:
                metric_value = str(resource[obj])
                if  str(obj) == "status" and "volumes" in metric:
                    obj = "vol_status"
                local_metric = local_metric + "." + str(obj)
                final_metric = {local_metric : metric_value}
                metrics.append(copy.deepcopy(final_metric))
        except {AttributeError,KeyError} as ex:
            logger.log("error", NS.get("publisher_id", None),
                      {'message': str(ex)})
            pass

    return metrics
Exemple #28
0
    def run(self):
        integration_id = self.parameters['TendrlContext.integration_id']
        logger.log(
            "info",
            NS.get("publisher_id", None),
            {
                "message": "Setting cluster %s is_managed to \"no\":" %
                           integration_id
            },
            job_id=self.parameters['job_id'],
            flow_id=self.parameters['flow_id']
        )
        try:
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=integration_id
            ).load()
            _cluster.is_managed = "no"
            _cluster.save()
        except etcd.EtcdKeyNotFound:
            logger.log(
                "error",
                NS.get("publisher_id", None),
                {
                    "message": "Error setting cluster %s"
                    "is_managed  to \"no\":" % (
                        integration_id
                    )
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )
            return False

        return True
Exemple #29
0
    def __init__(self, ns_name="tendrl", ns_src="tendrl.commons"):
        super(TendrlNS, self).__init__()
        if not hasattr(__builtin__, "NS"):
            setattr(__builtin__, "NS", maps.NamedDict())
            setattr(NS, "_int", maps.NamedDict())
            NS._int.wreconnect = cs_utils.wreconnect
            NS._int.reconnect = cs_utils.reconnect
            NS._int.watchers = dict()
        '''
            Note: Log messages in this file have try-except blocks to run in
            the condition when the node_agent has not been started and name
            spaces are being created.
        '''
        logger.log("info", NS.get("publisher_id", None),
                   {'message': "Creating namespace.%s from source %s"
                    % (ns_name, ns_src)})
        self.ns_name = ns_name
        self.ns_src = ns_src

        self._create_ns()

        self.current_ns = self._get_ns()
        logger.log("info", NS.get("publisher_id", None),
                   {'message': "namespace.%s created!" % self.ns_name})
        self._register_subclasses_to_ns()

        self.setup_definitions()
        self._validate_ns_definitions()
        self.setup_common_objects()
 def set_volume_count(self, cluster_data, resource_name):
     for cluster in cluster_data:
         resources = cluster[str(resource_name)]
         cluster[str(resource_name).lower() +
                 "_total_count"] = len(resources)
         up = 0
         down = 0
         partial = 0
         degraded = 0
         for resource in resources:
             try:
                 if resource["state"] == 0 or resource["state"] == 1:
                     up = up + 1
                 elif resource["state"] == 4:
                     partial = partial + 1
                 elif resource["state"] == 3:
                     degraded = degraded + 1
                 else:
                     down = down + 1
             except KeyError as ex:
                 logger.log(
                     "debug", NS.get("publisher_id", None), {
                         'message':
                         "Failed to set resource count "
                         "for {0}".format(resource_name) + str(ex)
                     })
         cluster[str(resource_name).lower() + "_up_count"] = up
         cluster[str(resource_name).lower() + "_down_count"] = down
         cluster[str(resource_name).lower() + "_partial_count"] = partial
         cluster[str(resource_name).lower() + "_degraded_count"] = degraded
     return cluster_data
 def set_volume_level_brick_count(self, cluster_data):
     for cluster in cluster_data:
         volume_detail = {}
         for volume in cluster["Volume"]:
             try:
                 volume_detail[volume["name"]] = {
                     "total": 0,
                     "up": 0,
                     "down": 0
                 }
             except (AttributeError, KeyError):
                 pass
         # Increment count using volume_details
         for brick in cluster["Brick"]:
             try:
                 volume_detail[str(brick["vol_name"])]["total"] = \
                     volume_detail[str(brick["vol_name"])]["total"] + 1
                 if brick["status"] == 0 or brick["status"] == 1:
                     volume_detail[str(brick["vol_name"])]["up"] = \
                         volume_detail[str(brick["vol_name"])]["up"] + 1
                 else:
                     volume_detail[str(brick["vol_name"])]["down"] = \
                         volume_detail[str(brick["vol_name"])]["down"] + 1
             except (AttributeError, KeyError) as ex:
                 logger.log(
                     "debug", NS.get("publisher_id", None), {
                         'message':
                         "Failed to set volume level "
                         "brick count" + str(ex)
                     })
         cluster["volume_level_brick_count"] = volume_detail
     return cluster_data
Exemple #32
0
def find_node_id(integration_id, fqdn):
    try:
        nodes = etcd_utils.read("clusters/%s/nodes" % integration_id)
        for node in nodes.leaves:
            node_id = node.key.split('/')[-1]
            node_context = NS.tendrl.objects.ClusterNodeContext()
            # formating value here because render populate integration_id
            # from namespace
            node_context.value = node_context.value.format(
                integration_id, node_id)
            if fqdn == node_context.load().fqdn:
                return node_id
        raise NodeNotFound
    except (EtcdKeyNotFound, NodeNotFound) as ex:
        if type(ex) != EtcdKeyNotFound:
            logger.log("error", NS.publisher_id,
                       {"message": "Failed to fetch fqdn for node %s" % fqdn})
        else:
            logger.log(
                "error", NS.publisher_id, {
                    "message":
                    "Node with fqdn %s not found "
                    "in cluster %s" % (fqdn, integration_id)
                })
        raise ex
Exemple #33
0
 def run(self):
     try:
         runner = ansible_module_runner.AnsibleRunner(
             ANSIBLE_MODULE_PATH, **self.attributes)
     except ansible_module_runner.AnsibleModuleNotFound:
         # Backward compat ansible<=2.2
         runner = ansible_module_runner.AnsibleRunner(
             "core/" + ANSIBLE_MODULE_PATH, **self.attributes)
     try:
         result, err = runner.run()
         try:
             logger.log("debug", NS.publisher_id,
                        {"message": "Command Execution: %s" % result})
         except KeyError:
             sys.stdout.write("Command Execution: %s \n" % result)
     except ansible_module_runner.AnsibleExecutableGenerationFailed as e:
         try:
             Event(
                 ExceptionMessage(priority="debug",
                                  publisher=NS.publisher_id,
                                  payload={
                                      "message":
                                      "could not run the command %s. " %
                                      self.attributes["_raw_params"],
                                      "exception":
                                      e
                                  }))
         except KeyError:
             sys.stderr.write("could not run the command %s. Error: %s\n" %
                              (self.attributes["_raw_params"], str(e)))
         return "", str(e.message), -1
     stdout = result.get("stdout", "")
     stderr = result.get("stderr", "").encode("ascii")
     rc = result.get("rc", -1)
     return stdout, stderr, rc
 def get_cluster_details(self, objects, cluster_key):
     cluster_detail = []
     for obj in objects["Cluster"]:
         if obj in ["metric", "value"]:
             continue
         resource_detail = {}
         resource_detail[str(obj)] = {}
         obj_details = objects["Cluster"][str(obj)]
         obj_key = os.path.join(cluster_key, str(obj))
         obj_attrs = obj_details["attrs"]
         for key, _ in obj_attrs.items():
             try:
                 attr_key = os.path.join(obj_key, key)
                 attr_data = etcd_utils.read(attr_key)
                 attr_value = self.cluster_status_mapper(
                     str(attr_data.value))
                 resource_detail[str(obj)][key] = copy.deepcopy(attr_value)
             except (KeyError, etcd.EtcdKeyNotFound) as ex:
                 integration_id = cluster_key.split("/")[-1]
                 logger.log(
                     "debug", NS.get("publisher_id", None), {
                         'message':
                         "Cannot Find {0} in Cluster "
                         "{1}".format(key, integration_id) + str(ex)
                     })
         if not resource_detail == {}:
             cluster_detail.append(resource_detail)
     return cluster_detail
Exemple #35
0
    def delete_volume(self, volume_name, host=None, force=None,
                      format_bricks=None):
        args = {}
        if host:
            args.update({"host": host})
        if force:
            args.update({"force": force})

        out, err, rc = delete_volume.delete_volume(
            volume_name,
            **args
        )
        if rc == 0:
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "gluster volume %s deleted successfully" %
                 volume_name},
                integration_id=NS.tendrl_context.integration_id
            )
        else:
            logger.log(
                "debug",
                NS.publisher_id,
                {"message": "Volume deletion failed for volume "
                 "%s. Details: %s" % (volume_name, out)},
                integration_id=NS.tendrl_context.integration_id
            )
            return False
        if format_bricks:
            pass
            # TODO(darshan) Call gdeploy action to clear brick
        return True
Exemple #36
0
    def run(self):
        logger.log("info",
                   NS.publisher_id, {
                       "message":
                       "Checking if volume %s doesnt exist" %
                       self.parameters['Volume.volname']
                   },
                   job_id=self.parameters["job_id"],
                   flow_id=self.parameters["flow_id"],
                   integration_id=NS.tendrl_context.integration_id)
        try:
            NS._int.client.read('clusters/%s/Volumes/%s' %
                                (NS.tendrl_context.integration_id,
                                 self.parameters['Volume.vol_id']))
        except etcd.EtcdKeyNotFound:
            logger.log("warning",
                       NS.publisher_id, {
                           "message":
                           "Volume %s doesnt exist" %
                           self.parameters['Volume.volname']
                       },
                       job_id=self.parameters["job_id"],
                       flow_id=self.parameters["flow_id"],
                       integration_id=NS.tendrl_context.integration_id)
            return True

        return False
Exemple #37
0
    def rebalance_volume(self, volume_name, action, host=None,
                         force=None, fix_layout=None):
        args = {}
        if host:
            args.update({"host": host})
        if force:
            args.update({"force": force})
        if fix_layout and action == "start":
            action = "fix-layout"

        out, err, rc = rebalance_volume.rebalance_volume(
            volume_name,
            action,
            **args
        )
        if rc == 0:
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Rebalance %s on volume %s performed"
                 "successfully" % (action, volume_name)},
                integration_id=NS.tendrl_context.integration_id
            )
        else:
            logger.log(
                "debug",
                NS.publisher_id,
                {"message": "Rebalance %s failed for volume "
                 "%s. Details: %s" % (action, volume_name, out)},
                integration_id=NS.tendrl_context.integration_id
            )
            return False
        return True
Exemple #38
0
 def gluster_provision_bricks(self, brick_dictionary, disk_type=None,
                              disk_count=None, stripe_count=None):
     out, err, rc = gluster_brick_provision.provision_disks(
         brick_dictionary,
         disk_type,
         disk_count,
         stripe_count
     )
     if rc == 0 and err == "":
         logger.log(
             "info",
             NS.publisher_id,
             {"message": "Bricks Provisioned successfully"},
             integration_id=NS.tendrl_context.integration_id
         )
     else:
         logger.log(
             "info",
             NS.publisher_id,
             {"message": "Bricks Provisioning Failed. Error %s" % (
                 str(out))},
             integration_id=NS.tendrl_context.integration_id
         )
         return False
     return True
Exemple #39
0
def find_grafana_pid():
    try:
        return check_output(["pidof", "grafana-server"]).strip()
    except CalledProcessError as ex:
        logger.log("error", NS.publisher_id,
                   {"message": "unable to find grafana pid"})
        raise ex
Exemple #40
0
    def stop_volume(self, volume_name, host=None, force=None):
        args = {}
        if host:
            args.update({"host": host})
        if force:
            args.update({"force": force})

        out, err, rc = stop_volume.stop_volume(
            volume_name,
            **args
        )
        if rc == 0:
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Volume %s stopped successfully" %
                 volume_name},
                integration_id=NS.tendrl_context.integration_id
            )
        else:
            logger.log(
                "debug",
                NS.publisher_id,
                {"message": "Volume stop failed for volume "
                 "%s. Details: %s" % (volume_name, out)},
                integration_id=NS.tendrl_context.integration_id
            )
            return False
        return True
def get_volumes_details(cluster_key):
    volume_details = []
    try:
        volume_list = utils.get_resource_keys(cluster_key, "Volumes")
        for volume_id in volume_list:
            deleted = etcd_utils.read(cluster_key + "/Volumes/" +
                                      str(volume_id) + "/" + "deleted").value
            if str(deleted).lower() != "true":
                volume_data = {}
                for attr in ATTRS["volumes"]:
                    volume_data[attr] = etcd_utils.read(cluster_key +
                                                        "/Volumes/" +
                                                        str(volume_id) + "/" +
                                                        attr).value
                subvolume_key = cluster_key + "/Volumes/" + str(volume_id)
                subvolume_details = get_subvolume_details(subvolume_key)
                volume_data["subvolume"] = subvolume_details
                volume_details.append(volume_data)
    except (KeyError, etcd.EtcdKeyNotFound) as ex:
        logger.log(
            "debug", NS.get("publisher_id", None), {
                'message':
                "Error while fetching "
                "volume id {}".format(volume_id) + str(ex)
            })
    return volume_details
    def run(self):
        retry_count = 0
        while True:
            volumes = None
            try:
                volumes = NS._int.client.read("clusters/%s/Volumes" %
                                              NS.tendrl_context.integration_id)
            except etcd.EtcdKeyNotFound:
                # ignore as no volumes available till now
                pass

            if volumes:
                for entry in volumes.leaves:
                    volume = NS.gluster.objects.Volume(
                        vol_id=entry.key.split("Volumes/")[-1]).load()
                    if volume.name == self.parameters['Volume.volname']:
                        return True

            retry_count += 1
            time.sleep(1)
            if retry_count == 600:
                logger.log(
                    "error",
                    NS.publisher_id, {
                        "message":
                        "Volume %s not reflected in tendrl"
                        " yet. Timing out" % self.parameters['Volume.volname']
                    },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    integration_id=NS.tendrl_context.integration_id)
                raise AtomExecutionFailedError(
                    "Volume %s not reflected in tendrl yet. Timing out" %
                    self.parameters['Volume.volname'])
Exemple #43
0
 def format_alert(self, alert_json):
     alert = self.parse_alert_metrics(alert_json)
     try:
         alert["alert_id"] = None
         alert["node_id"] = None
         alert["time_stamp"] = alert_json['NewStateDate']
         alert["resource"] = self.representive_name
         alert['alert_type'] = constants.ALERT_TYPE
         alert['significance'] = constants.SIGNIFICANCE_HIGH
         alert['pid'] = utils.find_grafana_pid()
         alert['source'] = constants.ALERT_SOURCE
         alert['tags']['cluster_name'] = utils.find_cluster_name(
             alert['tags']['integration_id'])
         if alert_json['State'] == constants.GRAFANA_ALERT:
             if "critical" in alert_json['Name'].lower():
                 alert['severity'] = \
                     constants.TENDRL_SEVERITY_MAP['critical']
             else:
                 alert['severity'] = \
                     constants.TENDRL_SEVERITY_MAP['warning']
             alert['tags']['message'] = (
                 "Volume utilization of %s in "
                 "cluster %s is %s %% which is above %s"
                 " threshold (%s %%)" %
                 (alert['tags']['volume_name'],
                  alert['tags']['integration_id'], alert['current_value'],
                  alert['severity'], alert['tags']['warning_max']))
         elif alert_json['State'] == constants.GRAFANA_CLEAR_ALERT:
             # Identifying clear alert from which panel critical/warning
             if "critical" in alert_json['Name'].lower():
                 alert['tags']['clear_alert'] = \
                     constants.TENDRL_SEVERITY_MAP['critical']
             elif "warning" in alert_json['Name'].lower():
                 alert['tags']['clear_alert'] = \
                     constants.TENDRL_SEVERITY_MAP['warning']
             alert['severity'] = constants.TENDRL_SEVERITY_MAP['info']
             alert['tags']['message'] = ("Volume utilization of %s in "
                                         "cluster %s is back normal" %
                                         (alert['tags']['volume_name'],
                                          alert['tags']['integration_id']))
         else:
             logger.log(
                 "error", NS.publisher_id, {
                     "message":
                     "Alert %s have unsupported alert"
                     "severity" % alert_json
                 })
             raise InvalidAlertSeverity
         return alert
     except (KeyError, CalledProcessError, EtcdKeyNotFound, NodeNotFound,
             InvalidAlertSeverity) as ex:
         Event(
             ExceptionMessage(
                 "debug", NS.publisher_id, {
                     "message":
                     "Error in converting grafana"
                     "alert into tendrl alert %s" % alert_json,
                     "exception":
                     ex
                 }))
def update_cluster_alert_count():
    cluster_alert_count = 0
    severity = ["WARNING", "CRITICAL"]
    try:
        alert_counts = get_volume_alert_counts()
        alerts = NS.tendrl.objects.ClusterAlert(
            tags={'integration_id': NS.tendrl_context.integration_id}
        ).load_all()
        for alert in alerts:
            alert.tags = json.loads(alert.tags)
            if alert.severity in severity:
                cluster_alert_count += 1
                if alert.resource in NS.gluster.objects.VolumeAlertCounters(
                        )._defs['relationship'][alert.alert_type.lower()]:
                    vol_name = alert.tags.get('volume_name', None)
                    if vol_name and vol_name in alert_counts.keys():
                        alert_counts[vol_name]['alert_count'] += 1
        # Update cluster alert count
        NS.tendrl.objects.ClusterAlertCounters(
            integration_id=NS.tendrl_context.integration_id,
            alert_count=cluster_alert_count
        ).save()
        # Update volume alert count
        for volume, vol_dict in alert_counts.iteritems():
            NS.gluster.objects.VolumeAlertCounters(
                integration_id=NS.tendrl_context.integration_id,
                alert_count=vol_dict['alert_count'],
                volume_id=vol_dict['vol_id']
            ).save()
    except (etcd.EtcdException, AttributeError) as ex:
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "Unable to update alert count.err: %s" % ex}
        )
def test_log():
    setattr(__builtin__, "NS", maps.NamedDict())
    NS.publisher_id = 1
    with mock.patch('tendrl.commons.event.Event.__init__',
                    mock.Mock(return_value=None)):
        with mock.patch('tendrl.commons.message.Message.__init__',
                        mock.Mock(return_value=None)):
            log_utils.log("info", "node_context", {"message": "test"})
    log_utils.log("error", None, {"message": "test"})
    def stop(self):
        if not self._cleanup_gluster_native_message_reciever():
            logger.log(
                "error",
                NS.publisher_id,
                {"message": "gluster native message reciever cleanup failed"}
            )

        cherrypy.engine.exit()
Exemple #47
0
 def reload_config(signum, frame):
     logger.log(
         "debug",
         NS.publisher_id,
         {
             "message": "Signal handler: SIGHUP,"
             " reload service config"
         }
     )
     NS.gluster.ns.setup_common_objects()
Exemple #48
0
    def shutdown(signum, frame):
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "Signal handler: stopping"}
        )
        # Remove the node's name from gluster server tag
        try:
            gl_srvr_list = etcd_utils.read(
                "/indexes/tags/gluster/server"
            ).value
            gl_srvr_list = json.loads(gl_srvr_list)
            if NS.node_context.node_id in gl_srvr_list:
                gl_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/gluster/server",
                json.dumps(gl_srvr_list)
            )
            node_tags = NS.node_context.tags
            if 'provisioner/%s' % NS.tendrl_context.integration_id \
                in node_tags:
                etcd_utils.delete(
                    "/indexes/tags/provisioner/%s" %
                    NS.tendrl_context.integration_id,
                    recursive=True
                )
            int_srvr_list = etcd_utils.read(
                "/indexes/tags/tendrl/integration/gluster"
            ).value
            int_srvr_list = json.loads(int_srvr_list)
            if NS.node_context.node_id in int_srvr_list:
                int_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/tendrl/integration/gluster",
                json.dumps(int_srvr_list)
            )
        except etcd.EtcdKeyNotFound:
            logger.log(
                "debug",
                NS.publisher_id,
                {
                    "message": "Couldnt remove node from "
                    "gluster servers list tag."
                    "integration_id: %s, node_id: %s" %
                    (
                        NS.tendrl_context.integration_id,
                        NS.node_context.node_id
                    )
                }
            )
            pass

        complete.set()
        m.stop()
Exemple #49
0
 def start(self):
     logger.log(
         "debug",
         NS.publisher_id,
         {"message": "%s starting" % self.__class__.__name__}
     )
     if self._message_handler_thread is not None:
         self._message_handler_thread.start()
     if self._sds_sync_thread is not None:
         self._sds_sync_thread.start()
     self._job_consumer_thread.start()
Exemple #50
0
 def load_definition(self):
     try:
         logger.log(
             "debug",
             NS.publisher_id,
             {"message": "Load definitions (.yml) for "
                         "namespace.%s."
                         "objects.%s.atoms.%s" %
                         (self._ns.ns_name, self.obj.__name__,
                          self.__class__.__name__)}
         )
     except KeyError:
         sys.stdout.write(
             "Load definitions (.yml) for "
             "namespace.%s.objects.%s."
             "atoms.%s \n" %
             (self._ns.ns_name, self.obj.__name__,
              self.__class__.__name__)
         )
     try:
         return self._ns.get_atom_definition(
             self.obj.__name__,
             self.__class__.__name__
         )
     except KeyError as ex:
         msg = "Could not find definitions (.yml) for" \
               "namespace.%s.objects.%s.atoms.%s" % \
               (
                   self._ns.ns_src,
                   self.obj.__name__,
                   self.__class__.__name__
               )
         try:
             Event(
                 ExceptionMessage(
                     priority="debug",
                     publisher=NS.publisher_id,
                     payload={"message": "Error", "exception": ex}
                 )
             )
         except KeyError:
             sys.stderr.write("Error: %s \n" % ex)
         try:
             logger.log(
                 "debug",
                 NS.publisher_id,
                 {"message": msg}
             )
         except KeyError:
             sys.stderr.write(msg + "\n")
         raise Exception(msg)
Exemple #51
0
def emit_event(resource, curr_value, msg, instance,
               severity, alert_notify=False, tags={},
               integration_id=None, cluster_name=None,
               sds_name=None, node_id=None):
    alert = {}
    alert['source'] = NS.publisher_id
    alert['node_id'] = node_id
    alert['pid'] = os.getpid()
    alert['time_stamp'] = tendrl_now().isoformat()
    alert['alert_type'] = 'STATUS'
    alert['severity'] = severity
    alert['resource'] = resource
    alert['current_value'] = curr_value
    alert['tags'] = dict(
        plugin_instance=instance,
        message=msg,
        integration_id=integration_id or NS.tendrl_context.integration_id,
        cluster_name=cluster_name or NS.tendrl_context.cluster_name
    )
    if "entity_type" in tags:
        if tags["entity_type"] == BRICK_ENTITY:
            alert['node_id'] = tags.get(
                "node_id", NS.node_context.node_id
            )
            alert['tags']['fqdn'] = tags.get(
                "fqdn", NS.node_context.fqdn
            )
            alert['tags']['volume_name'] = tags.get(
                'volume_name', None
            )
        elif tags["entity_type"] == VOLUME_ENTITY:
            alert['tags']['volume_name'] = tags.get(
                'volume_name', None
            )
    payload = {'message': json.dumps(alert)}
    payload['alert_condition_state'] = severity
    payload['alert_condition_status'] = resource

    if alert_notify:
        payload['alert_notify'] = alert_notify

    if severity == "INFO":
        payload['alert_condition_unset'] = True
    else:
        payload['alert_condition_unset'] = False
    logger.log(
        "notice",
        "alerting",
        payload,
        integration_id=integration_id
    )
Exemple #52
0
 def __run_module(self, attr):
     try:
         runner = ansible_module_runner.AnsibleRunner(
             ANSIBLE_MODULE_PATH,
             publisher_id=self.publisher_id,
             node_id=self.node_id,
             **attr
         )
     except ansible_module_runner.AnsibleModuleNotFound:
         # Backward compat ansible<=2.2
         runner = ansible_module_runner.AnsibleRunner(
             "core/" + ANSIBLE_MODULE_PATH,
             publisher_id=self.publisher_id,
             node_id=self.node_id,
             **attr
         )
     try:
         result, err = runner.run()
         logger.log(
             "debug",
             self.publisher_id,
             {"message": "Service Management: %s" % result}
         )
     except ansible_module_runner.AnsibleExecutableGenerationFailed as e:
         logger.log(
             "error",
             self.publisher_id,
             {"message": "Error switching the service: "
                         "%s to %s state. Error: %s" %
                         (self.attributes["name"],
                          attr["state"],
                          str(e)
                          )},
             node_id=self.node_id
         )
         return e.message, False
     message = result.get("msg", "").encode("ascii")
     state = result.get("state", "").encode("ascii")
     if attr["state"] in ["started", "restarted", "reloaded"]:
         if state == "started":
             success = True
         else:
             success = False
     else:
         if attr["state"] == state:
             success = True
         else:
             success = False
     return message, success
Exemple #53
0
 def peer_detach(self, event):
     time.sleep(self.sync_interval)
     job_id = monitoring_utils.update_dashboard(
         event['message']['host'],
         RESOURCE_TYPE_PEER,
         NS.tendrl_context.integration_id,
         "delete"
     )
     logger.log(
         "debug",
         NS.publisher_id,
         {
             "message": "Update dashboard job %s "
             "created" % job_id
         }
     )
Exemple #54
0
 def load_plugins(self):
     try:
         path = os.path.dirname(os.path.abspath(__file__)) + '/plugins'
         pkg = 'tendrl.gluster_integration.gdeploy_wrapper.plugins'
         plugins = self.list_modules_in_package_path(path, pkg)
         for name, plugin_fqdn in plugins:
             importlib.import_module(plugin_fqdn)
     except (SyntaxError, ValueError, ImportError) as ex:
         logger.log(
             "debug",
             NS.publisher_id,
             {"message": "Failed to load the gluster provisioner "
              "plugins. Error %s" % ex},
             integration_id=NS.tendrl_context.integration_id
         )
         raise ex
Exemple #55
0
    def _validate_ns_definitions(self):
        raw_ns = "namespace.%s" % self.ns_name
        try:
            defs = self.current_ns.definitions.get_parsed_defs()[raw_ns]
        except KeyError:
            msg = "%s definitions (.yml) not found" % raw_ns
            logger.log("error", NS.get("publisher_id", None),
                       {"message": msg})
            raise Exception(msg)

        '''
        Flow/Object/Atom classes with class variable "internal=True" will not
        be validated and have to define their own self._defs (i.e. definitions
        dict as per latest Tendrl schema)
        '''
        self._validate_ns_flow_definitions(raw_ns, defs)
        self._validate_ns_obj_definitions(raw_ns, defs)
Exemple #56
0
def release_node_lock(parameters):
    for node_id in parameters['Node[]']:
        nc = NS.tendrl.objects.NodeContext(node_id=node_id).load()
        try:
            lock_owner_job = nc.locked_by
            if lock_owner_job == parameters['job_id']:
                nc.locked_by = None
                nc.save()
                logger.log(
                    "info",
                    NS.publisher_id,
                    {"message": "Released lock (%s) on (%s)" %
                                (lock_owner_job, node_id)},
                    job_id=parameters['job_id'],
                    flow_id=parameters['flow_id']
                )
        except EtcdKeyNotFound:
            continue
 def _cleanup_gluster_native_message_reciever(self):
     url = "http://{0}:{1}{2}".format(self.host, str(self.port), self.path)
     cmd = cmd_utils.Command('gluster-eventsapi webhook-del %s' % url)
     out, err, rc = cmd.run()
     if rc != 0:
         severity = "debug" if "Webhook does not exists" in err else "error"
         logger.log(
             severity,
             NS.publisher_id,
             {
                 "message": "could not delete webhook from"
                 " glustereventsd. {0}: {1}".format(
                     severity,
                     err
                 )
             }
         )
     return True
Exemple #58
0
    def save(self, update=True, ttl=None):
        hash_key_changed = True
        if "Message" not in self.__class__.__name__:
            # If local object.hash is equal to
            # central_store object.hash, return
            if self.hash_compare_with_central_store(ttl=ttl):
                # No change in hashkey
                hash_key_changed = False
        rendered_obj = self.render()
        watchables = self._defs.get("watch_attrs", [])
        if self.__class__.__name__ in ['Config', 'Definition'] or \
            len(watchables) > 0:
            for item in rendered_obj:
                if item['name'] in watchables:
                    _type = self._defs.get("attrs", {}).get(
                        item['name'],
                        {}
                    ).get("type")
                    if _type and _type.lower() in ['json', 'list'] and \
                        item['value']:
                        try:
                            item['value'] = json.dumps(item['value'])
                        except ValueError:
                            _msg = "Error save() attr %s for object %s" % \
                                   (item['name'], self.__name__)
                            logger.log(
                                "debug",
                                NS.publisher_id,
                                {"message": _msg}
                            )
                    etcd_utils.write(item['key'], item['value'], quorum=True)
        if hash_key_changed:
            data_key = self.value + '/data'
            etcd_utils.write(data_key, self.json)
            updated_at_key = self.value + '/updated_at'
            hash_key = self.value + '/hash'
            etcd_utils.write(updated_at_key, str(time_utils.now()))
            if hasattr(self, 'hash'):
                etcd_utils.write(hash_key, self.hash)

            if ttl:
                etcd_utils.refresh(self.value, ttl)

        self.watch_attrs()
Exemple #59
0
    def _create_node_id(self):
        node_id = str(uuid.uuid4())
        try:
            logger.log(
                "debug",
                NS.publisher_id,
                {"message": "Registered Node (%s) with " % node_id}
            )
        except KeyError:
            sys.stdout.write("message: Registered Node (%s) \n" % node_id)

        local_node_id = "/var/lib/tendrl/node_id"
        if not os.path.exists(os.path.dirname(local_node_id)):
            os.makedirs(os.path.dirname(local_node_id))
        with open(local_node_id, 'wb+') as f:
            f.write(node_id)
        global NODE_ID
        NODE_ID = node_id
        return node_id
Exemple #60
0
 def run(self):
     try:
         runner = ansible_module_runner.AnsibleRunner(
             ANSIBLE_MODULE_PATH,
             **self.attributes
         )
     except ansible_module_runner.AnsibleModuleNotFound:
         # Backward compat ansible<=2.2
         runner = ansible_module_runner.AnsibleRunner(
             "core/" + ANSIBLE_MODULE_PATH,
             **self.attributes
         )
     try:
         result, err = runner.run()
         try:
             logger.log(
                 "debug",
                 NS.publisher_id,
                 {"message": "Command Execution: %s" % result}
             )
         except KeyError:
             sys.stdout.write("Command Execution: %s \n" % result)
     except ansible_module_runner.AnsibleExecutableGenerationFailed as e:
         try:
             Event(
                 ExceptionMessage(
                     priority="debug",
                     publisher=NS.publisher_id,
                     payload={"message": "could not run the command %s. " %
                                         self.attributes["_raw_params"],
                              "exception": e
                              }
                 )
             )
         except KeyError:
             sys.stderr.write("could not run the command %s. Error: %s\n" %
                              (self.attributes["_raw_params"], str(e))
                              )
         return "", str(e.message), -1
     stdout = result.get("stdout", "")
     stderr = result.get("stderr", "").encode("ascii")
     rc = result.get("rc", -1)
     return stdout, stderr, rc