Exemple #1
0
 def invalidate_hash(self):
     self.render()
     _hash_key = "/{0}/hash".format(self.value)
     try:
         etcd_utils.delete(_hash_key)
     except etcd.EtcdKeyNotFound:
         pass
Exemple #2
0
 def invalidate_hash(self):
     self.render()
     _hash_key = "/{0}/hash".format(self.value)
     try:
         etcd_utils.delete(_hash_key)
     except etcd.EtcdKeyNotFound:
         pass
Exemple #3
0
    def on_change(self, attr, prev_value, current_value):
        if attr == "status" and "tendrl/monitor" in NS.node_context.tags:
            _tc = NS.tendrl.objects.TendrlContext(node_id=self.node_id).load()
            # Check node is managed
            _cnc = NS.tendrl.objects.ClusterNodeContext(
                node_id=self.node_id,
                integration_id=_tc.integration_id).load()
            if current_value is None and str(_cnc.is_managed).lower() == "yes":
                self.status = "DOWN"
                self.save()
                msg = "Node {0} is DOWN".format(self.fqdn)
                event_utils.emit_event("node_status",
                                       self.status,
                                       msg,
                                       "node_{0}".format(self.fqdn),
                                       "WARNING",
                                       node_id=self.node_id,
                                       integration_id=_tc.integration_id)
                # Load cluster_node_context will load node_context
                # and it will be updated with latest values
                _cnc_new = \
                    NS.tendrl.objects.ClusterNodeContext(
                        node_id=self.node_id,
                        integration_id=_tc.integration_id,
                        first_sync_done=_cnc.first_sync_done,
                        is_managed=_cnc.is_managed
                    )
                _cnc_new.save()
                del _cnc_new
                # Update cluster details
                self.update_cluster_details(_tc.integration_id)
                _tag = "provisioner/%s" % _tc.integration_id
                if _tag in self.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    self.tags.remove(_tag)
                    self.save()
                    etcd_utils.delete(_index_key)
                if _tc.sds_name in ["gluster", "RHGS"]:
                    bricks = etcd_utils.read(
                        "clusters/{0}/Bricks/all/{1}".format(
                            _tc.integration_id, self.fqdn))

                    for brick in bricks.leaves:
                        try:
                            etcd_utils.write("{0}/status".format(brick.key),
                                             "Stopped")
                        except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound):
                            pass
            elif current_value == "UP" and str(
                    _cnc.is_managed).lower() == "yes":
                msg = "{0} is UP".format(self.fqdn)
                event_utils.emit_event("node_status",
                                       "UP",
                                       msg,
                                       "node_{0}".format(self.fqdn),
                                       "INFO",
                                       node_id=self.node_id,
                                       integration_id=_tc.integration_id)
            del _cnc
Exemple #4
0
    def run(self):
        integration_id = self.parameters['TendrlContext.integration_id']

        etcd_keys_to_delete = []
        etcd_keys_to_delete.append("/clusters/%s/nodes" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Bricks" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Volumes" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/GlobalDetails" %
                                   integration_id)
        etcd_keys_to_delete.append("/clusters/%s/TendrlContext" %
                                   integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Utilization" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/raw_map" % integration_id)
        etcd_keys_to_delete.append("/alerting/clusters/%s" % integration_id)
        nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id)
        node_ids = []
        for node in nodes.leaves:
            node_id = node.key.split("/")[-1]
            node_ids.append(node_id)
            etcd_keys_to_delete.append("/alerting/nodes/%s" % node_id)

        # Find the alerting/alerts entries to be deleted
        try:
            cluster_alert_ids = etcd_utils.read("/alerting/clusters")
            for entry in cluster_alert_ids.leaves:
                ca_id = entry.key.split("/")[-1]
                etcd_keys_to_delete.append("/alerting/alerts/%s" % ca_id)
        except etcd.EtcdKeyNotFound:
            # No cluster alerts, continue
            pass

        try:
            node_alert_ids = etcd_utils.read("/alerting/nodes")
            for entry in node_alert_ids.leaves:
                na_id = entry.key.split("/")[-1]
                etcd_keys_to_delete.append("/alerting/alerts/%s" % na_id)
        except etcd.EtcdKeyNotFound:
            # No node alerts, continue
            pass

        # Remove the cluster details
        for key in list(set(etcd_keys_to_delete)):
            try:
                etcd_utils.delete(key, recursive=True)
            except etcd.EtcdKeyNotFound:
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {"message": "%s key not found for deletion" % key},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                )
                continue

        return True
Exemple #5
0
    def shutdown(signum, frame):
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "Signal handler: stopping"}
        )
        # Remove the node's name from gluster server tag
        try:
            gl_srvr_list = etcd_utils.read(
                "/indexes/tags/gluster/server"
            ).value
            gl_srvr_list = json.loads(gl_srvr_list)
            if NS.node_context.node_id in gl_srvr_list:
                gl_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/gluster/server",
                json.dumps(gl_srvr_list)
            )
            node_tags = NS.node_context.tags
            if 'provisioner/%s' % NS.tendrl_context.integration_id \
                in node_tags:
                etcd_utils.delete(
                    "/indexes/tags/provisioner/%s" %
                    NS.tendrl_context.integration_id,
                    recursive=True
                )
            int_srvr_list = etcd_utils.read(
                "/indexes/tags/tendrl/integration/gluster"
            ).value
            int_srvr_list = json.loads(int_srvr_list)
            if NS.node_context.node_id in int_srvr_list:
                int_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/tendrl/integration/gluster",
                json.dumps(int_srvr_list)
            )
        except etcd.EtcdKeyNotFound:
            logger.log(
                "debug",
                NS.publisher_id,
                {
                    "message": "Couldnt remove node from "
                    "gluster servers list tag."
                    "integration_id: %s, node_id: %s" %
                    (
                        NS.tendrl_context.integration_id,
                        NS.node_context.node_id
                    )
                }
            )
            pass

        complete.set()
        m.stop()
    def shutdown(signum, frame):
        logger.log(
            "debug",
            NS.publisher_id,
            {"message": "Signal handler: stopping"}
        )
        # Remove the node's name from gluster server tag
        try:
            gl_srvr_list = etcd_utils.read(
                "/indexes/tags/gluster/server"
            ).value
            gl_srvr_list = json.loads(gl_srvr_list)
            if NS.node_context.node_id in gl_srvr_list:
                gl_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/gluster/server",
                json.dumps(gl_srvr_list)
            )
            node_tags = json.loads(NS.node_context.tags)
            if 'provisioner/%s' % NS.tendrl_context.integration_id \
                in node_tags:
                etcd_utils.delete(
                    "/indexes/tags/provisioner/%s" %
                    NS.tendrl_context.integration_id,
                    recursive=True
                )
            int_srvr_list = etcd_utils.read(
                "/indexes/tags/tendrl/integration/gluster"
            ).value
            int_srvr_list = json.loads(int_srvr_list)
            if NS.node_context.node_id in int_srvr_list:
                int_srvr_list.remove(NS.node_context.node_id)
            etcd_utils.write(
                "/indexes/tags/tendrl/integration/gluster",
                json.dumps(int_srvr_list)
            )
        except etcd.EtcdKeyNotFound:
            logger.log(
                "debug",
                NS.publisher_id,
                {
                    "message": "Couldnt remove node from "
                    "gluster servers list tag."
                    "integration_id: %s, node_id: %s" %
                    (
                        NS.tendrl_context.integration_id,
                        NS.node_context.node_id
                    )
                }
            )
            pass

        complete.set()
        m.stop()
Exemple #7
0
    def on_change(self, attr, prev_value, current_value):
        if attr == "status":
            if current_value is None:
                self.status = "DOWN"
                self.save()
                msg = "Node {0} is DOWN".format(self.fqdn)
                event_utils.emit_event("node_status",
                                       self.status,
                                       msg,
                                       "node_{0}".format(self.fqdn),
                                       "WARNING",
                                       node_id=self.node_id)

                _tc = NS.tendrl.objects.TendrlContext(
                    node_id=self.node_id).load()
                _tag = "provisioner/%s" % _tc.integration_id
                if _tag in self.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    self.tags.remove(_tag)
                    self.save()
                    etcd_utils.delete(_index_key)
                    _msg = "node_sync, STALE provisioner node "\
                        "found! re-configuring monitoring "\
                        "(job-id: %s) on this node"
                    payload = {
                        "tags": ["tendrl/node_%s" % self.node_id],
                        "run": "tendrl.flows.ConfigureMonitoring",
                        "status": "new",
                        "parameters": {
                            'TendrlContext.integration_id': _tc.integration_id
                        },
                        "type": "node"
                    }
                    _job_id = str(uuid.uuid4())
                    NS.tendrl.objects.Job(job_id=_job_id,
                                          status="new",
                                          payload=payload).save()
                    logger.log("debug", NS.publisher_id,
                               {"message": _msg % _job_id})

                if _tc.sds_name == "gluster":
                    bricks = etcd_utils.read(
                        "clusters/{0}/Bricks/all/{1}".format(
                            _tc.integration_id, self.fqdn))

                    for brick in bricks.leaves:
                        try:
                            etcd_utils.write("{0}/status".format(brick.key),
                                             "Stopped")
                        except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound):
                            pass
Exemple #8
0
def run():
    try:
        nodes = NS._int.client.read("/nodes")
    except etcd.EtcdKeyNotFound:
        return

    for node in nodes.leaves:
        node_id = node.key.split('/')[-1]
        try:
            NS._int.wclient.write(
                "/nodes/{0}/NodeContext/status".format(node_id),
                "DOWN",
                prevExist=False)
            _node_context = NS.tendrl.objects.NodeContext(
                node_id=node_id).load()
            _tc = NS.tendrl.objects.TendrlContext(node_id=node_id).load()
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=_tc.integration_id).load()

            # Remove stale provisioner tag
            if _cluster.is_managed == "yes":
                _tag = "provisioner/%s" % _cluster.integration_id
                if _tag in _node_context.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    _node_context.tags.remove(_tag)
                    _node_context.save()
                    etcd_utils.delete(_index_key)
                    _msg = "node_sync, STALE provisioner node found! re-configuring monitoring (job-id: %s) on this node"
                    payload = {
                        "tags": ["tendrl/node_%s" % node_id],
                        "run": "tendrl.flows.ConfigureMonitoring",
                        "status": "new",
                        "parameters": {
                            'TendrlContext.integration_id': _tc.integration_id
                        },
                        "type": "node"
                    }
                    _job_id = str(uuid.uuid4())
                    Job(job_id=_job_id, status="new", payload=payload).save()
                    Event(
                        Message(priority="debug",
                                publisher=NS.publisher_id,
                                payload={"message": _msg % _job_id}))

        except etcd.EtcdAlreadyExist:
            pass
    return
def test_delete():
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS._int.wclient = importlib.import_module("tendrl.commons"
                                              ".tests.fixtures."
                                              "client").Client()
    NS._int.wreconnect = type("Dummy", (object, ), {})
    with patch.object(Client, "delete") as mock_delete:
        etcd_utils.delete("key")
        assert mock_delete.assert_called
    with patch.object(Client, "delete",
                      raise_etcdconnectionfailed) as mock_delete:
        with pytest.raises(etcd.EtcdConnectionFailed):
            etcd_utils.delete("key")
    with patch.object(Client, "delete", raise_etcdkeynotfound) as mock_delete:
        with pytest.raises(etcd.EtcdKeyNotFound):
            etcd_utils.delete("key")
def test_delete():
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS._int.wclient = importlib.import_module("tendrl.commons"
                                              ".tests.fixtures."
                                              "client").Client()
    NS._int.wreconnect = type("Dummy", (object,), {})
    with patch.object(Client, "delete") as mock_delete:
        etcd_utils.delete("key")
        assert mock_delete.assert_called
    with patch.object(Client, "delete",
                      raise_etcdconnectionfailed) as mock_delete:
        with pytest.raises(etcd.EtcdConnectionFailed):
            etcd_utils.delete("key")
    with patch.object(Client, "delete",
                      raise_etcdkeynotfound) as mock_delete:
        with pytest.raises(etcd.EtcdKeyNotFound):
            etcd_utils.delete("key")
Exemple #11
0
    def on_change(self, attr, prev_value, current_value):
        if attr == "status" and "tendrl/monitor" in NS.node_context.tags:
            _tc = NS.tendrl.objects.TendrlContext(node_id=self.node_id).load()
            # Check node is managed
            _cnc = NS.tendrl.objects.ClusterNodeContext(
                node_id=self.node_id,
                integration_id=_tc.integration_id).load()
            if current_value is None and str(_cnc.is_managed).lower() == "yes":
                self.status = "DOWN"
                self.save()
                msg = "Node {0} is DOWN".format(self.fqdn)
                event_utils.emit_event("node_status",
                                       self.status,
                                       msg,
                                       "node_{0}".format(self.fqdn),
                                       "WARNING",
                                       node_id=self.node_id,
                                       integration_id=_tc.integration_id)
                # Load cluster_node_context will load node_context
                # and it will be updated with latest values
                _cnc_new = \
                    NS.tendrl.objects.ClusterNodeContext(
                        node_id=self.node_id,
                        integration_id=_tc.integration_id,
                        first_sync_done=_cnc.first_sync_done,
                        is_managed=_cnc.is_managed
                    )
                _cnc_new.save()
                del _cnc_new
                # Update cluster details
                self.update_cluster_details(_tc.integration_id)
                _tag = "provisioner/%s" % _tc.integration_id
                if _tag in self.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    self.tags.remove(_tag)
                    self.save()
                    etcd_utils.delete(_index_key)
                    _msg = "node_sync, STALE provisioner node "\
                        "found! re-configuring monitoring "\
                        "(job-id: %s) on this node"
                    payload = {
                        "tags": ["tendrl/node_%s" % self.node_id],
                        "run": "tendrl.flows.ConfigureMonitoring",
                        "status": "new",
                        "parameters": {
                            'TendrlContext.integration_id': _tc.integration_id
                        },
                        "type": "node"
                    }
                    _job_id = str(uuid.uuid4())
                    NS.tendrl.objects.Job(job_id=_job_id,
                                          status="new",
                                          payload=payload).save()
                    logger.log("debug", NS.publisher_id,
                               {"message": _msg % _job_id})

                if _tc.sds_name in ["gluster", "RHGS"]:
                    bricks = etcd_utils.read(
                        "clusters/{0}/Bricks/all/{1}".format(
                            _tc.integration_id, self.fqdn))

                    for brick in bricks.leaves:
                        try:
                            etcd_utils.write("{0}/status".format(brick.key),
                                             "Stopped")
                        except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound):
                            pass
            elif current_value == "UP" and str(
                    _cnc.is_managed).lower() == "yes":
                msg = "{0} is UP".format(self.fqdn)
                event_utils.emit_event("node_status",
                                       "UP",
                                       msg,
                                       "node_{0}".format(self.fqdn),
                                       "INFO",
                                       node_id=self.node_id,
                                       integration_id=_tc.integration_id)
            del _cnc
Exemple #12
0
    def on_change(self, attr, prev_value, current_value):
        if attr == "status" and "tendrl/monitor" in NS.node_context.tags:
            _tc = NS.tendrl.objects.TendrlContext(
                node_id=self.node_id
            ).load()
            # Check node is managed
            _cnc = NS.tendrl.objects.ClusterNodeContext(
                node_id=self.node_id,
                integration_id=_tc.integration_id
            ).load()
            if current_value is None and str(_cnc.is_managed).lower() == "yes":
                self.status = "DOWN"
                self.save()
                msg = "Node {0} is DOWN".format(self.fqdn)
                event_utils.emit_event(
                    "node_status",
                    self.status,
                    msg,
                    "node_{0}".format(self.fqdn),
                    "WARNING",
                    node_id=self.node_id,
                    integration_id=_tc.integration_id
                )
                # Load cluster_node_context will load node_context
                # and it will be updated with latest values
                _cnc_new = \
                    NS.tendrl.objects.ClusterNodeContext(
                        node_id=self.node_id,
                        integration_id=_tc.integration_id,
                        first_sync_done=_cnc.first_sync_done,
                        is_managed=_cnc.is_managed
                    )
                _cnc_new.save()
                del _cnc_new
                # Update cluster details
                self.update_cluster_details(_tc.integration_id)
                _tag = "provisioner/%s" % _tc.integration_id
                if _tag in self.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    self.tags.remove(_tag)
                    self.save()
                    etcd_utils.delete(_index_key)
                if _tc.sds_name in ["gluster", "RHGS"]:
                    bricks = etcd_utils.read(
                        "clusters/{0}/Bricks/all/{1}".format(
                            _tc.integration_id,
                            self.fqdn
                        )
                    )

                    for brick in bricks.leaves:
                        try:
                            etcd_utils.write(
                                "{0}/status".format(brick.key),
                                "Stopped"
                            )
                        except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound):
                            pass
            elif current_value == "UP" and str(
                    _cnc.is_managed).lower() == "yes":
                msg = "{0} is UP".format(self.fqdn)
                event_utils.emit_event(
                    "node_status",
                    "UP",
                    msg,
                    "node_{0}".format(self.fqdn),
                    "INFO",
                    node_id=self.node_id,
                    integration_id=_tc.integration_id
                )
            del _cnc
Exemple #13
0
    def run(self):
        logger.log(
            "info",
            NS.publisher_id,
            {
                "message": "Deleting cluster details."
            },
            job_id=self.parameters['job_id'],
            flow_id=self.parameters['flow_id'],
        )
        integration_id = self.parameters['TendrlContext.integration_id']

        etcd_keys_to_delete = []
        etcd_keys_to_delete.append(
            "/clusters/%s/nodes" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/Bricks" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/Volumes" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/GlobalDetails" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/TendrlContext" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/Utilization" % integration_id
        )
        etcd_keys_to_delete.append(
            "/clusters/%s/raw_map" % integration_id
        )
        etcd_keys_to_delete.append(
            "/alerting/clusters/%s" % integration_id
        )
        nodes = etcd_utils.read(
            "/clusters/%s/nodes" % integration_id
        )
        node_ids = []
        for node in nodes.leaves:
            node_id = node.key.split("/")[-1]
            node_ids.append(node_id)
            key = "/alerting/nodes/%s" % node_id
            etcd_keys_to_delete.append(
                key
            )
            try:
                # delete node alerts from /alerting/alerts
                node_alerts = etcd_utils.read(key)
                for node_alert in node_alerts.leaves:
                    etcd_keys_to_delete.append(
                        "/alerting/alerts/%s" % node_alert.key.split(
                            "/")[-1]
                    )
            except etcd.EtcdKeyNotFound:
                # No node alerts, continue
                pass

        # Find the alerting/alerts entries to be deleted
        try:
            cluster_alert_ids = etcd_utils.read(
                "/alerting/clusters/%s" % integration_id
            )
            for entry in cluster_alert_ids.leaves:
                ca_id = entry.key.split("/")[-1]
                etcd_keys_to_delete.append(
                    "/alerting/alerts/%s" % ca_id
                )
        except etcd.EtcdKeyNotFound:
            # No cluster alerts, continue
            pass

        # Remove the cluster details
        for key in list(set(etcd_keys_to_delete)):
            try:
                etcd_utils.delete(key, recursive=True)
            except etcd.EtcdKeyNotFound:
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {
                        "message": "%s key not found for deletion" %
                        key
                    },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                )
                continue
        # remove short name
        cluster = NS.tendrl.objects.Cluster(
            integration_id=integration_id
        ).load()
        cluster.short_name = ""
        cluster.save()

        return True
Exemple #14
0
    def volume_remove_brick_force(self, event):
        time.sleep(self.sync_interval)
        # Event returns bricks list as space separated single string
        bricks = event['message']['bricks'].split(" ")
        try:
            for brick in bricks:
                # find fqdn using ip
                ip = socket.gethostbyname(brick.split(":/")[0])
                node_id = etcd_utils.read("indexes/ip/%s" % ip).value
                fqdn = NS.tendrl.objects.ClusterNodeContext(
                    node_id=node_id
                ).load().fqdn
                brick = fqdn + ":" + brick.split(":")[-1]
                fetched_brick = NS.tendrl.objects.GlusterBrick(
                    NS.tendrl_context.integration_id,
                    fqdn=brick.split(":/")[0],
                    brick_dir=brick.split(":/")[1].replace('/', '_')
                ).load()

                # delete brick
                etcd_utils.delete(
                    "clusters/{0}/Bricks/all/{1}/{2}".format(
                        NS.tendrl_context.integration_id,
                        brick.split(":/")[0],
                        brick.split(":/")[1].replace('/', '_')
                    ),
                    recursive=True,
                )

                # delete alert dashbaord
                job_id = monitoring_utils.update_dashboard(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK,
                    NS.tendrl_context.integration_id,
                    "delete"
                )
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {
                        "message": "Update dashboard job %s "
                        "created" % job_id
                    }
                )

                # delete brick details from graphite
                job_id = monitoring_utils.delete_resource_from_graphite(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK,
                    NS.tendrl_context.integration_id,
                    "delete"
                )
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {
                        "message": "Delete resource from graphite job %s "
                        "created" % job_id
                    }
                )

            volume_brick_path = "clusters/{0}/Volumes/{1}/"\
                                "Bricks".format(
                                    NS.tendrl_context.integration_id,
                                    fetched_brick.vol_id,
                                )

            # remove all the brick infromation under volume as the
            # subvolume might have changed, let the next sync handle
            # the updation of brick info
            etcd_utils.delete(
                volume_brick_path,
                recursive=True
            )

            _trigger_sync_key = 'clusters/%s/_sync_now' % \
                NS.tendrl_context.integration_id
            etcd_utils.write(_trigger_sync_key, 'true')
            etcd_utils.refresh(_trigger_sync_key, self.sync_interval)
        except etcd.EtcdKeyNotFound:
            logger.log(
                "debug",
                NS.publisher_id,
                {
                    "message": "Unable to delete bricks %s" % bricks
                }
            )
Exemple #15
0
    def volume_delete(self, event):
        time.sleep(self.sync_interval)
        fetched_volumes = NS.tendrl.objects.GlusterVolume(
            NS.tendrl_context.integration_id
        ).load_all()
        for fetched_volume in fetched_volumes:
            if fetched_volume.name == event['message']['name']:
                fetched_volume.deleted = True
                fetched_volume.deleted_at = time_utils.now()
                fetched_volume.save()
                try:
                    sub_volumes = etcd_utils.read(
                        "/clusters/{0}/Volumes/{1}/Bricks".format(
                            NS.tendrl_context.integration_id,
                            fetched_volume.vol_id
                        )
                    )

                    for sub_volume in sub_volumes.leaves:
                        bricks = etcd_utils.read(
                            sub_volume.key
                        )
                        for brick in bricks.leaves:
                            fqdn = brick.key.split('/')[-1].split(':')[0]
                            path = brick.key.split('/')[-1].split(':')[-1][1:]
                            # Delete brick dashboard from grafana
                            brick_obj = NS.tendrl.objects.GlusterBrick(
                                NS.tendrl_context.integration_id,
                                fqdn,
                                path
                            ).load()
                            # Delete brick
                            brick_path = "clusters/{0}/Bricks/"\
                                         "all/{1}/{2}".format(
                                             NS.tendrl_context.integration_id,
                                             fqdn,
                                             path
                                         )
                            etcd_utils.delete(
                                brick_path,
                                recursive=True
                            )
                            brick_full_path = fqdn + ":" + brick_obj.\
                                brick_path.split(":")[-1]
                            job_id = monitoring_utils.update_dashboard(
                                "%s|%s" % (
                                    event['message']['name'],
                                    brick_full_path
                                ),
                                RESOURCE_TYPE_BRICK,
                                NS.tendrl_context.integration_id,
                                "delete"
                            )
                            logger.log(
                                "debug",
                                NS.publisher_id,
                                {
                                    "message": "Update dashboard job %s"
                                    " for brick %s "
                                    "in cluster %s created" % (
                                        job_id,
                                        brick.key.split('/')[-1],
                                        NS.tendrl_context.integration_id
                                    )
                                }
                            )
                            # Delete brick from graphite
                            job_id = monitoring_utils.\
                                delete_resource_from_graphite(
                                    "%s|%s" % (
                                        event['message']['name'],
                                        brick_full_path
                                    ),
                                    RESOURCE_TYPE_BRICK,
                                    NS.tendrl_context.integration_id,
                                    "delete"
                                )
                            logger.log(
                                "debug",
                                NS.publisher_id,
                                {
                                    "message": "Delete resource "
                                    "from graphite job %s "
                                    "for brick %s in cluster %s created" % (
                                        job_id,
                                        brick.key.split('/')[-1],
                                        NS.tendrl_context.integration_id
                                    )
                                }
                            )
                except etcd.EtcdKeyNotFound:
                    pass
        # Delete volume dashboard from grafana
        job_id = monitoring_utils.update_dashboard(
            event['message']['name'],
            RESOURCE_TYPE_VOLUME,
            NS.tendrl_context.integration_id,
            "delete"
        )
        logger.log(
            "debug",
            NS.publisher_id,
            {
                "message": "Update dashboard job %s "
                "created" % job_id
            }
        )
        # Delete volume details from graphite
        job_id = monitoring_utils.delete_resource_from_graphite(
            event['message']['name'],
            RESOURCE_TYPE_VOLUME,
            NS.tendrl_context.integration_id,
            "delete"
        )
        logger.log(
            "debug",
            NS.publisher_id,
            {
                "message": "Delete resource from graphite job %s "
                "created" % job_id
            }
        )
Exemple #16
0
    def run(self):
        logger.log(
            "info",
            NS.publisher_id,
            {"message": "Deleting cluster details."},
            job_id=self.parameters['job_id'],
            flow_id=self.parameters['flow_id'],
        )
        integration_id = self.parameters['TendrlContext.integration_id']

        etcd_keys_to_delete = []
        etcd_keys_to_delete.append("/clusters/%s/nodes" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Bricks" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Volumes" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/GlobalDetails" %
                                   integration_id)
        etcd_keys_to_delete.append("/clusters/%s/TendrlContext" %
                                   integration_id)
        etcd_keys_to_delete.append("/clusters/%s/Utilization" % integration_id)
        etcd_keys_to_delete.append("/clusters/%s/raw_map" % integration_id)
        etcd_keys_to_delete.append("/alerting/clusters/%s" % integration_id)
        nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id)
        node_ids = []
        for node in nodes.leaves:
            node_id = node.key.split("/")[-1]
            node_ids.append(node_id)
            key = "/alerting/nodes/%s" % node_id
            etcd_keys_to_delete.append(key)
            try:
                # delete node alerts from /alerting/alerts
                node_alerts = etcd_utils.read(key)
                for node_alert in node_alerts.leaves:
                    etcd_keys_to_delete.append("/alerting/alerts/%s" %
                                               node_alert.key.split("/")[-1])
            except etcd.EtcdKeyNotFound:
                # No node alerts, continue
                pass

        # Find the alerting/alerts entries to be deleted
        try:
            cluster_alert_ids = etcd_utils.read("/alerting/clusters/%s" %
                                                integration_id)
            for entry in cluster_alert_ids.leaves:
                ca_id = entry.key.split("/")[-1]
                etcd_keys_to_delete.append("/alerting/alerts/%s" % ca_id)
        except etcd.EtcdKeyNotFound:
            # No cluster alerts, continue
            pass

        try:
            index_key = "/indexes/tags/tendrl/integration/%s" % integration_id
            _node_ids = etcd_utils.read(index_key).value
            _node_ids = json.loads(_node_ids)
            for _node_id in _node_ids[:]:
                node_obj = NS.tendrl.objects.NodeContext(
                    node_id=_node_id).load()
                # Remove cluster indexes for down node
                if node_obj.status.lower() == "down":
                    _node_ids.remove(_node_id)
                    # Removing down node details
                    logger.log(
                        "warning",
                        NS.publisher_id,
                        {
                            "message":
                            "Deleting down node %s details" % node_obj.fqdn
                        },
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                    )
                    etcd_keys_to_delete.append("/nodes/%s" % _node_id)
            etcd_utils.write(index_key, json.dumps(_node_ids))
        except (etcd.EtcdKeyNotFound, ValueError, TypeError, AttributeError,
                IndexError):
            # If index details not present then we don't need to stop
            # un-manage flow, Because when node-agent work properly these
            # details are populated again by the node sync
            pass
        # Remove the cluster details
        for key in list(set(etcd_keys_to_delete)):
            try:
                etcd_utils.delete(key, recursive=True)
            except etcd.EtcdKeyNotFound:
                logger.log(
                    "debug",
                    NS.publisher_id,
                    {"message": "%s key not found for deletion" % key},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                )
                continue
        # remove short name
        cluster = NS.tendrl.objects.Cluster(
            integration_id=integration_id).load()
        cluster.short_name = ""
        cluster.save()
        return True
Exemple #17
0
    def volume_delete(self, event):
        time.sleep(self.sync_interval)
        fetched_volumes = NS.tendrl.objects.GlusterVolume(
            NS.tendrl_context.integration_id).load_all()
        for fetched_volume in fetched_volumes:
            if fetched_volume.name == event['message']['name']:
                fetched_volume.deleted = True
                fetched_volume.deleted_at = time_utils.now()
                fetched_volume.save()
                try:
                    sub_volumes = etcd_utils.read(
                        "/clusters/{0}/Volumes/{1}/Bricks".format(
                            NS.tendrl_context.integration_id,
                            fetched_volume.vol_id))

                    for sub_volume in sub_volumes.leaves:
                        bricks = etcd_utils.read(sub_volume.key)
                        for brick in bricks.leaves:
                            fqdn = brick.key.split('/')[-1].split(':')[0]
                            path = brick.key.split('/')[-1].split(':')[-1][1:]
                            # Delete brick dashboard from grafana
                            brick_obj = NS.tendrl.objects.GlusterBrick(
                                NS.tendrl_context.integration_id, fqdn,
                                path).load()
                            # Delete brick
                            brick_path = "clusters/{0}/Bricks/"\
                                         "all/{1}/{2}".format(
                                             NS.tendrl_context.integration_id,
                                             fqdn,
                                             path
                                         )
                            etcd_utils.delete(brick_path, recursive=True)
                            brick_full_path = fqdn + ":" + brick_obj.\
                                brick_path.split(":")[-1]
                            job_id = monitoring_utils.update_dashboard(
                                "%s|%s" %
                                (event['message']['name'], brick_full_path),
                                RESOURCE_TYPE_BRICK,
                                NS.tendrl_context.integration_id, "delete")
                            logger.log(
                                "debug", NS.publisher_id, {
                                    "message":
                                    "Update dashboard job %s"
                                    " for brick %s "
                                    "in cluster %s created" %
                                    (job_id, brick.key.split('/')[-1],
                                     NS.tendrl_context.integration_id)
                                })
                            # Delete brick from graphite
                            job_id = monitoring_utils.\
                                delete_resource_from_graphite(
                                    "%s|%s" % (
                                        event['message']['name'],
                                        brick_full_path
                                    ),
                                    RESOURCE_TYPE_BRICK,
                                    NS.tendrl_context.integration_id,
                                    "delete"
                                )
                            logger.log(
                                "debug", NS.publisher_id, {
                                    "message":
                                    "Delete resource "
                                    "from graphite job %s "
                                    "for brick %s in cluster %s created" %
                                    (job_id, brick.key.split('/')[-1],
                                     NS.tendrl_context.integration_id)
                                })
                except etcd.EtcdKeyNotFound:
                    pass
        # Delete volume dashboard from grafana
        job_id = monitoring_utils.update_dashboard(
            event['message']['name'], RESOURCE_TYPE_VOLUME,
            NS.tendrl_context.integration_id, "delete")
        logger.log("debug", NS.publisher_id,
                   {"message": "Update dashboard job %s "
                    "created" % job_id})
        # Delete volume details from graphite
        job_id = monitoring_utils.delete_resource_from_graphite(
            event['message']['name'], RESOURCE_TYPE_VOLUME,
            NS.tendrl_context.integration_id, "delete")
        logger.log("debug", NS.publisher_id, {
            "message":
            "Delete resource from graphite job %s "
            "created" % job_id
        })
Exemple #18
0
    def volume_remove_brick_force(self, event):
        time.sleep(self.sync_interval)
        # Event returns bricks list as space separated single string
        bricks = event['message']['bricks'].split(" ")
        try:
            for brick in bricks:
                # find fqdn using ip
                ip = socket.gethostbyname(brick.split(":/")[0])
                node_id = etcd_utils.read("indexes/ip/%s" % ip).value
                fqdn = NS.tendrl.objects.ClusterNodeContext(
                    node_id=node_id).load().fqdn
                brick = fqdn + ":" + brick.split(":")[-1]
                fetched_brick = NS.tendrl.objects.GlusterBrick(
                    NS.tendrl_context.integration_id,
                    fqdn=brick.split(":/")[0],
                    brick_dir=brick.split(":/")[1].replace('/', '_')).load()

                # delete brick
                etcd_utils.delete(
                    "clusters/{0}/Bricks/all/{1}/{2}".format(
                        NS.tendrl_context.integration_id,
                        brick.split(":/")[0],
                        brick.split(":/")[1].replace('/', '_')),
                    recursive=True,
                )

                # delete alert dashbaord
                job_id = monitoring_utils.update_dashboard(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                    "delete")
                logger.log(
                    "debug", NS.publisher_id,
                    {"message": "Update dashboard job %s "
                     "created" % job_id})

                # delete brick details from graphite
                job_id = monitoring_utils.delete_resource_from_graphite(
                    "%s|%s" % (event['message']['volume'], brick),
                    RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id,
                    "delete")
                logger.log(
                    "debug", NS.publisher_id, {
                        "message":
                        "Delete resource from graphite job %s "
                        "created" % job_id
                    })

            volume_brick_path = "clusters/{0}/Volumes/{1}/"\
                                "Bricks".format(
                                    NS.tendrl_context.integration_id,
                                    fetched_brick.vol_id,
                                )

            # remove all the brick infromation under volume as the
            # subvolume might have changed, let the next sync handle
            # the updation of brick info
            etcd_utils.delete(volume_brick_path, recursive=True)

            _trigger_sync_key = 'clusters/%s/_sync_now' % \
                NS.tendrl_context.integration_id
            etcd_utils.write(_trigger_sync_key, 'true')
            etcd_utils.refresh(_trigger_sync_key, self.sync_interval)
        except etcd.EtcdKeyNotFound:
            logger.log("debug", NS.publisher_id,
                       {"message": "Unable to delete bricks %s" % bricks})
Exemple #19
0
def remove(key):
    etcd_utils.delete(key, recursive=True)
Exemple #20
0
def sync(sync_ttl, node_status_ttl):
    try:
        NS.node_context = NS.node_context.load()
        logger.log("debug", NS.publisher_id,
                   {"message": "Running SDS detection"})
        try:
            sds_discovery_manager = sds_manager.SDSDiscoveryManager()
        except ValueError as ex:
            Event(
                ExceptionMessage(priority="debug",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message":
                                     "Failed to init SDSDiscoveryManager.",
                                     "exception": ex
                                 }))
            return

        # Execute the SDS discovery plugins and tag the nodes with data
        for plugin in sds_discovery_manager.get_available_plugins():
            sds_details = plugin.discover_storage_system()
            if sds_details is None:
                break

            if "peers" in sds_details and NS.tendrl_context.integration_id:
                _cnc = NS.tendrl.objects.ClusterNodeContext().load()
                this_peer_uuid = ""
                if _cnc.is_managed != "yes" or not NS.node_context.fqdn:
                    for peer_uuid, data in sds_details.get("peers",
                                                           {}).iteritems():
                        peer = NS.tendrl.objects.GlusterPeer(
                            peer_uuid=peer_uuid,
                            hostname=data['hostname'],
                            connected=data['connected'])
                        peer.save()
                        if data['hostname'] == "localhost":
                            this_peer_uuid = peer_uuid

                    # Figure out the hostname used to probe this peer
                    integration_id_index_key = \
                        "indexes/tags/tendrl/integration/%s" %\
                        NS.tendrl_context.integration_id
                    _node_ids = etcd_utils.read(integration_id_index_key).value
                    _node_ids = json.loads(_node_ids)
                    for _node_id in _node_ids:
                        if _node_id != NS.node_context.node_id:
                            peer = NS.tendrl.objects.GlusterPeer(
                                peer_uuid=this_peer_uuid,
                                node_id=_node_id).load()
                            if peer.hostname:
                                NS.node_context.pkey = peer.hostname
                                NS.node_context.fqdn = peer.hostname
                                NS.node_context.ipv4_addr = \
                                    socket.gethostbyname(
                                        peer.hostname
                                    )
                                NS.node_context.save(ttl=node_status_ttl)
                                break

            if ('detected_cluster_id' in sds_details
                    and sds_details['detected_cluster_id'] != ""):
                try:
                    integration_index_key = \
                        "indexes/detected_cluster_id_to_integration_id/" \
                        "%s" % sds_details['detected_cluster_id']
                    dc = NS.tendrl.objects.DetectedCluster().load()
                    if dc is None or dc.detected_cluster_id is None:
                        time.sleep(sync_ttl)
                        integration_id = str(uuid.uuid4())
                        try:
                            etcd_utils.write(integration_index_key,
                                             integration_id,
                                             prevExist=False)
                        except etcd.EtcdAlreadyExist:
                            pass

                    _ptag = None
                    if NS.tendrl_context.integration_id:
                        _ptag = "provisioner/%s" % \
                            NS.tendrl_context.integration_id

                        if _ptag in NS.node_context.tags:
                            if dc.detected_cluster_id and \
                                dc.detected_cluster_id != sds_details.get(
                                    'detected_cluster_id'):
                                # Gluster peer list has changed
                                integration_id = \
                                    NS.tendrl_context.integration_id
                                etcd_utils.write(integration_index_key,
                                                 integration_id)
                                _cluster = NS.tendrl.objects.Cluster(
                                    integration_id=integration_id).load()
                                # If peer detached for down node before import
                                # then it should not block the import by
                                # changing cluster status
                                if _cluster.is_managed == "yes":
                                    _cluster.status = "new_peers_detected"
                                    _cluster.save()
                                    # Raise an alert regarding the same
                                    msg = "New peers identified in cluster: " \
                                        "%s. Make sure tendrl-ansible is " \
                                        "executed for the new nodes so that " \
                                        "expand cluster option can be " \
                                        "triggered" % _cluster.short_name
                                    event_utils.emit_event(
                                        "cluster_status",
                                        "new_peers_detected",
                                        msg,
                                        "cluster_{0}".format(integration_id),
                                        "WARNING",
                                        integration_id=integration_id)
                            _cluster = NS.tendrl.objects.Cluster(
                                integration_id=NS.tendrl_context.integration_id
                            ).load()
                            if _cluster.status == "new_peers_detected":
                                peers = []
                                cmd = subprocess.Popen("gluster pool list",
                                                       shell=True,
                                                       stdout=subprocess.PIPE,
                                                       stderr=subprocess.PIPE)
                                out, err = cmd.communicate()
                                if err or out is None or \
                                    "Connection failed" in out:
                                    pass  # set the no of peers as zero
                                if out:
                                    lines = out.split('\n')[1:]
                                    for line in lines:
                                        if line.strip() != '':
                                            peers.append(line.split()[0])
                                nodes_ids = json.loads(
                                    etcd_utils.read(
                                        "indexes/tags/tendrl/integration/%s" %
                                        NS.tendrl_context.integration_id).value
                                )
                                if len(nodes_ids) == len(peers):
                                    # All the nodes are having node-agents
                                    # running and known to tendrl
                                    msg = "New nodes in cluster: %s have " \
                                        "node agents running now. Cluster " \
                                        "is ready to expand." % \
                                        _cluster.short_name
                                    event_utils.emit_event(
                                        "cluster_status",
                                        "expand_pending",
                                        msg,
                                        "cluster_{0}".format(
                                            NS.tendrl_context.integration_id),
                                        "INFO",
                                        integration_id=NS.tendrl_context.
                                        integration_id)
                                    # Set the cluster status accordingly
                                    _cluster.status = 'expand_pending'
                                    _cluster.save()
                    loop_count = 0
                    while True:
                        # Wait till provisioner node assigns
                        # integration_id for this detected_cluster_id
                        if loop_count >= 72:
                            return
                        try:
                            time.sleep(5)
                            integration_id = etcd_utils.read(
                                integration_index_key).value
                            if integration_id:
                                break
                        except etcd.EtcdKeyNotFound:
                            loop_count += 1
                            continue

                    NS.tendrl_context.integration_id = integration_id
                    NS.tendrl_context.cluster_id = sds_details.get(
                        'detected_cluster_id')
                    NS.tendrl_context.cluster_name = sds_details.get(
                        'detected_cluster_name')
                    NS.tendrl_context.sds_name = sds_details.get('pkg_name')
                    NS.tendrl_context.sds_version = sds_details.get(
                        'pkg_version')
                    NS.tendrl_context.save()

                    NS.node_context = NS.node_context.load()
                    integration_tag = "tendrl/integration/%s" % \
                                      integration_id
                    detected_cluster_tag = "detected_cluster/%s" % \
                                           sds_details[
                                               'detected_cluster_id']
                    # Detected cluster id will change when new node
                    # added into peer list and when peer detach happens,
                    # Node_context should not maintain multiple DC ids
                    old_dc_id = "detected_cluster/%s" % dc.detected_cluster_id
                    if old_dc_id in NS.node_context.tags and \
                            old_dc_id != detected_cluster_tag:
                        NS.node_context.tags.remove(old_dc_id)
                        # remove old detected cluster_id from indexes
                        indexes_keys = []
                        indexes_keys.append(
                            "indexes/detected_cluster_id_to_integration_id"
                            "/%s" % dc.detected_cluster_id)
                        indexes_keys.append(
                            "indexes/tags/detected_cluster/%s" %
                            dc.detected_cluster_id)
                        for indexes_key in indexes_keys:
                            try:
                                etcd_utils.delete(indexes_key)
                            except etcd.EtcdKeyNotFound:
                                # It may be removed by other nodes
                                # in a same cluster
                                pass
                    NS.node_context.tags += [
                        detected_cluster_tag, integration_tag
                    ]
                    NS.node_context.tags = list(set(NS.node_context.tags))
                    NS.node_context.save(ttl=node_status_ttl)

                    NS.tendrl.objects.DetectedCluster(
                        detected_cluster_id=sds_details.get(
                            'detected_cluster_id'),
                        detected_cluster_name=sds_details.get(
                            'detected_cluster_name'),
                        sds_pkg_name=sds_details.get('pkg_name'),
                        sds_pkg_version=sds_details.get('pkg_version'),
                    ).save()
                    _cluster = NS.tendrl.objects.Cluster(
                        integration_id=NS.tendrl_context.integration_id).load(
                        )
                    if _cluster.current_job.get(
                        'status', ''
                    ) in ['', 'finished', 'failed'] \
                        and _cluster.status in [None, ""]:
                        _cluster.save()

                except (etcd.EtcdException, KeyError) as ex:
                    Event(
                        ExceptionMessage(priority="debug",
                                         publisher=NS.publisher_id,
                                         payload={
                                             "message": "Failed SDS detection",
                                             "exception": ex
                                         }))
                break
    except Exception as ex:
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 "node_sync "
                                 "SDS detection failed: " + ex.message,
                                 "exception":
                                 ex
                             }))