def invalidate_hash(self): self.render() _hash_key = "/{0}/hash".format(self.value) try: etcd_utils.delete(_hash_key) except etcd.EtcdKeyNotFound: pass
def on_change(self, attr, prev_value, current_value): if attr == "status" and "tendrl/monitor" in NS.node_context.tags: _tc = NS.tendrl.objects.TendrlContext(node_id=self.node_id).load() # Check node is managed _cnc = NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id).load() if current_value is None and str(_cnc.is_managed).lower() == "yes": self.status = "DOWN" self.save() msg = "Node {0} is DOWN".format(self.fqdn) event_utils.emit_event("node_status", self.status, msg, "node_{0}".format(self.fqdn), "WARNING", node_id=self.node_id, integration_id=_tc.integration_id) # Load cluster_node_context will load node_context # and it will be updated with latest values _cnc_new = \ NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id, first_sync_done=_cnc.first_sync_done, is_managed=_cnc.is_managed ) _cnc_new.save() del _cnc_new # Update cluster details self.update_cluster_details(_tc.integration_id) _tag = "provisioner/%s" % _tc.integration_id if _tag in self.tags: _index_key = "/indexes/tags/%s" % _tag self.tags.remove(_tag) self.save() etcd_utils.delete(_index_key) if _tc.sds_name in ["gluster", "RHGS"]: bricks = etcd_utils.read( "clusters/{0}/Bricks/all/{1}".format( _tc.integration_id, self.fqdn)) for brick in bricks.leaves: try: etcd_utils.write("{0}/status".format(brick.key), "Stopped") except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound): pass elif current_value == "UP" and str( _cnc.is_managed).lower() == "yes": msg = "{0} is UP".format(self.fqdn) event_utils.emit_event("node_status", "UP", msg, "node_{0}".format(self.fqdn), "INFO", node_id=self.node_id, integration_id=_tc.integration_id) del _cnc
def run(self): integration_id = self.parameters['TendrlContext.integration_id'] etcd_keys_to_delete = [] etcd_keys_to_delete.append("/clusters/%s/nodes" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Bricks" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Volumes" % integration_id) etcd_keys_to_delete.append("/clusters/%s/GlobalDetails" % integration_id) etcd_keys_to_delete.append("/clusters/%s/TendrlContext" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Utilization" % integration_id) etcd_keys_to_delete.append("/clusters/%s/raw_map" % integration_id) etcd_keys_to_delete.append("/alerting/clusters/%s" % integration_id) nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id) node_ids = [] for node in nodes.leaves: node_id = node.key.split("/")[-1] node_ids.append(node_id) etcd_keys_to_delete.append("/alerting/nodes/%s" % node_id) # Find the alerting/alerts entries to be deleted try: cluster_alert_ids = etcd_utils.read("/alerting/clusters") for entry in cluster_alert_ids.leaves: ca_id = entry.key.split("/")[-1] etcd_keys_to_delete.append("/alerting/alerts/%s" % ca_id) except etcd.EtcdKeyNotFound: # No cluster alerts, continue pass try: node_alert_ids = etcd_utils.read("/alerting/nodes") for entry in node_alert_ids.leaves: na_id = entry.key.split("/")[-1] etcd_keys_to_delete.append("/alerting/alerts/%s" % na_id) except etcd.EtcdKeyNotFound: # No node alerts, continue pass # Remove the cluster details for key in list(set(etcd_keys_to_delete)): try: etcd_utils.delete(key, recursive=True) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, {"message": "%s key not found for deletion" % key}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) continue return True
def shutdown(signum, frame): logger.log( "debug", NS.publisher_id, {"message": "Signal handler: stopping"} ) # Remove the node's name from gluster server tag try: gl_srvr_list = etcd_utils.read( "/indexes/tags/gluster/server" ).value gl_srvr_list = json.loads(gl_srvr_list) if NS.node_context.node_id in gl_srvr_list: gl_srvr_list.remove(NS.node_context.node_id) etcd_utils.write( "/indexes/tags/gluster/server", json.dumps(gl_srvr_list) ) node_tags = NS.node_context.tags if 'provisioner/%s' % NS.tendrl_context.integration_id \ in node_tags: etcd_utils.delete( "/indexes/tags/provisioner/%s" % NS.tendrl_context.integration_id, recursive=True ) int_srvr_list = etcd_utils.read( "/indexes/tags/tendrl/integration/gluster" ).value int_srvr_list = json.loads(int_srvr_list) if NS.node_context.node_id in int_srvr_list: int_srvr_list.remove(NS.node_context.node_id) etcd_utils.write( "/indexes/tags/tendrl/integration/gluster", json.dumps(int_srvr_list) ) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, { "message": "Couldnt remove node from " "gluster servers list tag." "integration_id: %s, node_id: %s" % ( NS.tendrl_context.integration_id, NS.node_context.node_id ) } ) pass complete.set() m.stop()
def shutdown(signum, frame): logger.log( "debug", NS.publisher_id, {"message": "Signal handler: stopping"} ) # Remove the node's name from gluster server tag try: gl_srvr_list = etcd_utils.read( "/indexes/tags/gluster/server" ).value gl_srvr_list = json.loads(gl_srvr_list) if NS.node_context.node_id in gl_srvr_list: gl_srvr_list.remove(NS.node_context.node_id) etcd_utils.write( "/indexes/tags/gluster/server", json.dumps(gl_srvr_list) ) node_tags = json.loads(NS.node_context.tags) if 'provisioner/%s' % NS.tendrl_context.integration_id \ in node_tags: etcd_utils.delete( "/indexes/tags/provisioner/%s" % NS.tendrl_context.integration_id, recursive=True ) int_srvr_list = etcd_utils.read( "/indexes/tags/tendrl/integration/gluster" ).value int_srvr_list = json.loads(int_srvr_list) if NS.node_context.node_id in int_srvr_list: int_srvr_list.remove(NS.node_context.node_id) etcd_utils.write( "/indexes/tags/tendrl/integration/gluster", json.dumps(int_srvr_list) ) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, { "message": "Couldnt remove node from " "gluster servers list tag." "integration_id: %s, node_id: %s" % ( NS.tendrl_context.integration_id, NS.node_context.node_id ) } ) pass complete.set() m.stop()
def on_change(self, attr, prev_value, current_value): if attr == "status": if current_value is None: self.status = "DOWN" self.save() msg = "Node {0} is DOWN".format(self.fqdn) event_utils.emit_event("node_status", self.status, msg, "node_{0}".format(self.fqdn), "WARNING", node_id=self.node_id) _tc = NS.tendrl.objects.TendrlContext( node_id=self.node_id).load() _tag = "provisioner/%s" % _tc.integration_id if _tag in self.tags: _index_key = "/indexes/tags/%s" % _tag self.tags.remove(_tag) self.save() etcd_utils.delete(_index_key) _msg = "node_sync, STALE provisioner node "\ "found! re-configuring monitoring "\ "(job-id: %s) on this node" payload = { "tags": ["tendrl/node_%s" % self.node_id], "run": "tendrl.flows.ConfigureMonitoring", "status": "new", "parameters": { 'TendrlContext.integration_id': _tc.integration_id }, "type": "node" } _job_id = str(uuid.uuid4()) NS.tendrl.objects.Job(job_id=_job_id, status="new", payload=payload).save() logger.log("debug", NS.publisher_id, {"message": _msg % _job_id}) if _tc.sds_name == "gluster": bricks = etcd_utils.read( "clusters/{0}/Bricks/all/{1}".format( _tc.integration_id, self.fqdn)) for brick in bricks.leaves: try: etcd_utils.write("{0}/status".format(brick.key), "Stopped") except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound): pass
def run(): try: nodes = NS._int.client.read("/nodes") except etcd.EtcdKeyNotFound: return for node in nodes.leaves: node_id = node.key.split('/')[-1] try: NS._int.wclient.write( "/nodes/{0}/NodeContext/status".format(node_id), "DOWN", prevExist=False) _node_context = NS.tendrl.objects.NodeContext( node_id=node_id).load() _tc = NS.tendrl.objects.TendrlContext(node_id=node_id).load() _cluster = NS.tendrl.objects.Cluster( integration_id=_tc.integration_id).load() # Remove stale provisioner tag if _cluster.is_managed == "yes": _tag = "provisioner/%s" % _cluster.integration_id if _tag in _node_context.tags: _index_key = "/indexes/tags/%s" % _tag _node_context.tags.remove(_tag) _node_context.save() etcd_utils.delete(_index_key) _msg = "node_sync, STALE provisioner node found! re-configuring monitoring (job-id: %s) on this node" payload = { "tags": ["tendrl/node_%s" % node_id], "run": "tendrl.flows.ConfigureMonitoring", "status": "new", "parameters": { 'TendrlContext.integration_id': _tc.integration_id }, "type": "node" } _job_id = str(uuid.uuid4()) Job(job_id=_job_id, status="new", payload=payload).save() Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": _msg % _job_id})) except etcd.EtcdAlreadyExist: pass return
def test_delete(): setattr(__builtin__, "NS", maps.NamedDict()) setattr(NS, "_int", maps.NamedDict()) NS._int.wclient = importlib.import_module("tendrl.commons" ".tests.fixtures." "client").Client() NS._int.wreconnect = type("Dummy", (object, ), {}) with patch.object(Client, "delete") as mock_delete: etcd_utils.delete("key") assert mock_delete.assert_called with patch.object(Client, "delete", raise_etcdconnectionfailed) as mock_delete: with pytest.raises(etcd.EtcdConnectionFailed): etcd_utils.delete("key") with patch.object(Client, "delete", raise_etcdkeynotfound) as mock_delete: with pytest.raises(etcd.EtcdKeyNotFound): etcd_utils.delete("key")
def test_delete(): setattr(__builtin__, "NS", maps.NamedDict()) setattr(NS, "_int", maps.NamedDict()) NS._int.wclient = importlib.import_module("tendrl.commons" ".tests.fixtures." "client").Client() NS._int.wreconnect = type("Dummy", (object,), {}) with patch.object(Client, "delete") as mock_delete: etcd_utils.delete("key") assert mock_delete.assert_called with patch.object(Client, "delete", raise_etcdconnectionfailed) as mock_delete: with pytest.raises(etcd.EtcdConnectionFailed): etcd_utils.delete("key") with patch.object(Client, "delete", raise_etcdkeynotfound) as mock_delete: with pytest.raises(etcd.EtcdKeyNotFound): etcd_utils.delete("key")
def on_change(self, attr, prev_value, current_value): if attr == "status" and "tendrl/monitor" in NS.node_context.tags: _tc = NS.tendrl.objects.TendrlContext(node_id=self.node_id).load() # Check node is managed _cnc = NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id).load() if current_value is None and str(_cnc.is_managed).lower() == "yes": self.status = "DOWN" self.save() msg = "Node {0} is DOWN".format(self.fqdn) event_utils.emit_event("node_status", self.status, msg, "node_{0}".format(self.fqdn), "WARNING", node_id=self.node_id, integration_id=_tc.integration_id) # Load cluster_node_context will load node_context # and it will be updated with latest values _cnc_new = \ NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id, first_sync_done=_cnc.first_sync_done, is_managed=_cnc.is_managed ) _cnc_new.save() del _cnc_new # Update cluster details self.update_cluster_details(_tc.integration_id) _tag = "provisioner/%s" % _tc.integration_id if _tag in self.tags: _index_key = "/indexes/tags/%s" % _tag self.tags.remove(_tag) self.save() etcd_utils.delete(_index_key) _msg = "node_sync, STALE provisioner node "\ "found! re-configuring monitoring "\ "(job-id: %s) on this node" payload = { "tags": ["tendrl/node_%s" % self.node_id], "run": "tendrl.flows.ConfigureMonitoring", "status": "new", "parameters": { 'TendrlContext.integration_id': _tc.integration_id }, "type": "node" } _job_id = str(uuid.uuid4()) NS.tendrl.objects.Job(job_id=_job_id, status="new", payload=payload).save() logger.log("debug", NS.publisher_id, {"message": _msg % _job_id}) if _tc.sds_name in ["gluster", "RHGS"]: bricks = etcd_utils.read( "clusters/{0}/Bricks/all/{1}".format( _tc.integration_id, self.fqdn)) for brick in bricks.leaves: try: etcd_utils.write("{0}/status".format(brick.key), "Stopped") except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound): pass elif current_value == "UP" and str( _cnc.is_managed).lower() == "yes": msg = "{0} is UP".format(self.fqdn) event_utils.emit_event("node_status", "UP", msg, "node_{0}".format(self.fqdn), "INFO", node_id=self.node_id, integration_id=_tc.integration_id) del _cnc
def on_change(self, attr, prev_value, current_value): if attr == "status" and "tendrl/monitor" in NS.node_context.tags: _tc = NS.tendrl.objects.TendrlContext( node_id=self.node_id ).load() # Check node is managed _cnc = NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id ).load() if current_value is None and str(_cnc.is_managed).lower() == "yes": self.status = "DOWN" self.save() msg = "Node {0} is DOWN".format(self.fqdn) event_utils.emit_event( "node_status", self.status, msg, "node_{0}".format(self.fqdn), "WARNING", node_id=self.node_id, integration_id=_tc.integration_id ) # Load cluster_node_context will load node_context # and it will be updated with latest values _cnc_new = \ NS.tendrl.objects.ClusterNodeContext( node_id=self.node_id, integration_id=_tc.integration_id, first_sync_done=_cnc.first_sync_done, is_managed=_cnc.is_managed ) _cnc_new.save() del _cnc_new # Update cluster details self.update_cluster_details(_tc.integration_id) _tag = "provisioner/%s" % _tc.integration_id if _tag in self.tags: _index_key = "/indexes/tags/%s" % _tag self.tags.remove(_tag) self.save() etcd_utils.delete(_index_key) if _tc.sds_name in ["gluster", "RHGS"]: bricks = etcd_utils.read( "clusters/{0}/Bricks/all/{1}".format( _tc.integration_id, self.fqdn ) ) for brick in bricks.leaves: try: etcd_utils.write( "{0}/status".format(brick.key), "Stopped" ) except (etcd.EtcdAlreadyExist, etcd.EtcdKeyNotFound): pass elif current_value == "UP" and str( _cnc.is_managed).lower() == "yes": msg = "{0} is UP".format(self.fqdn) event_utils.emit_event( "node_status", "UP", msg, "node_{0}".format(self.fqdn), "INFO", node_id=self.node_id, integration_id=_tc.integration_id ) del _cnc
def run(self): logger.log( "info", NS.publisher_id, { "message": "Deleting cluster details." }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) integration_id = self.parameters['TendrlContext.integration_id'] etcd_keys_to_delete = [] etcd_keys_to_delete.append( "/clusters/%s/nodes" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/Bricks" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/Volumes" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/GlobalDetails" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/TendrlContext" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/Utilization" % integration_id ) etcd_keys_to_delete.append( "/clusters/%s/raw_map" % integration_id ) etcd_keys_to_delete.append( "/alerting/clusters/%s" % integration_id ) nodes = etcd_utils.read( "/clusters/%s/nodes" % integration_id ) node_ids = [] for node in nodes.leaves: node_id = node.key.split("/")[-1] node_ids.append(node_id) key = "/alerting/nodes/%s" % node_id etcd_keys_to_delete.append( key ) try: # delete node alerts from /alerting/alerts node_alerts = etcd_utils.read(key) for node_alert in node_alerts.leaves: etcd_keys_to_delete.append( "/alerting/alerts/%s" % node_alert.key.split( "/")[-1] ) except etcd.EtcdKeyNotFound: # No node alerts, continue pass # Find the alerting/alerts entries to be deleted try: cluster_alert_ids = etcd_utils.read( "/alerting/clusters/%s" % integration_id ) for entry in cluster_alert_ids.leaves: ca_id = entry.key.split("/")[-1] etcd_keys_to_delete.append( "/alerting/alerts/%s" % ca_id ) except etcd.EtcdKeyNotFound: # No cluster alerts, continue pass # Remove the cluster details for key in list(set(etcd_keys_to_delete)): try: etcd_utils.delete(key, recursive=True) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, { "message": "%s key not found for deletion" % key }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) continue # remove short name cluster = NS.tendrl.objects.Cluster( integration_id=integration_id ).load() cluster.short_name = "" cluster.save() return True
def volume_remove_brick_force(self, event): time.sleep(self.sync_interval) # Event returns bricks list as space separated single string bricks = event['message']['bricks'].split(" ") try: for brick in bricks: # find fqdn using ip ip = socket.gethostbyname(brick.split(":/")[0]) node_id = etcd_utils.read("indexes/ip/%s" % ip).value fqdn = NS.tendrl.objects.ClusterNodeContext( node_id=node_id ).load().fqdn brick = fqdn + ":" + brick.split(":")[-1] fetched_brick = NS.tendrl.objects.GlusterBrick( NS.tendrl_context.integration_id, fqdn=brick.split(":/")[0], brick_dir=brick.split(":/")[1].replace('/', '_') ).load() # delete brick etcd_utils.delete( "clusters/{0}/Bricks/all/{1}/{2}".format( NS.tendrl_context.integration_id, brick.split(":/")[0], brick.split(":/")[1].replace('/', '_') ), recursive=True, ) # delete alert dashbaord job_id = monitoring_utils.update_dashboard( "%s|%s" % (event['message']['volume'], brick), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Update dashboard job %s " "created" % job_id } ) # delete brick details from graphite job_id = monitoring_utils.delete_resource_from_graphite( "%s|%s" % (event['message']['volume'], brick), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Delete resource from graphite job %s " "created" % job_id } ) volume_brick_path = "clusters/{0}/Volumes/{1}/"\ "Bricks".format( NS.tendrl_context.integration_id, fetched_brick.vol_id, ) # remove all the brick infromation under volume as the # subvolume might have changed, let the next sync handle # the updation of brick info etcd_utils.delete( volume_brick_path, recursive=True ) _trigger_sync_key = 'clusters/%s/_sync_now' % \ NS.tendrl_context.integration_id etcd_utils.write(_trigger_sync_key, 'true') etcd_utils.refresh(_trigger_sync_key, self.sync_interval) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, { "message": "Unable to delete bricks %s" % bricks } )
def volume_delete(self, event): time.sleep(self.sync_interval) fetched_volumes = NS.tendrl.objects.GlusterVolume( NS.tendrl_context.integration_id ).load_all() for fetched_volume in fetched_volumes: if fetched_volume.name == event['message']['name']: fetched_volume.deleted = True fetched_volume.deleted_at = time_utils.now() fetched_volume.save() try: sub_volumes = etcd_utils.read( "/clusters/{0}/Volumes/{1}/Bricks".format( NS.tendrl_context.integration_id, fetched_volume.vol_id ) ) for sub_volume in sub_volumes.leaves: bricks = etcd_utils.read( sub_volume.key ) for brick in bricks.leaves: fqdn = brick.key.split('/')[-1].split(':')[0] path = brick.key.split('/')[-1].split(':')[-1][1:] # Delete brick dashboard from grafana brick_obj = NS.tendrl.objects.GlusterBrick( NS.tendrl_context.integration_id, fqdn, path ).load() # Delete brick brick_path = "clusters/{0}/Bricks/"\ "all/{1}/{2}".format( NS.tendrl_context.integration_id, fqdn, path ) etcd_utils.delete( brick_path, recursive=True ) brick_full_path = fqdn + ":" + brick_obj.\ brick_path.split(":")[-1] job_id = monitoring_utils.update_dashboard( "%s|%s" % ( event['message']['name'], brick_full_path ), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Update dashboard job %s" " for brick %s " "in cluster %s created" % ( job_id, brick.key.split('/')[-1], NS.tendrl_context.integration_id ) } ) # Delete brick from graphite job_id = monitoring_utils.\ delete_resource_from_graphite( "%s|%s" % ( event['message']['name'], brick_full_path ), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Delete resource " "from graphite job %s " "for brick %s in cluster %s created" % ( job_id, brick.key.split('/')[-1], NS.tendrl_context.integration_id ) } ) except etcd.EtcdKeyNotFound: pass # Delete volume dashboard from grafana job_id = monitoring_utils.update_dashboard( event['message']['name'], RESOURCE_TYPE_VOLUME, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Update dashboard job %s " "created" % job_id } ) # Delete volume details from graphite job_id = monitoring_utils.delete_resource_from_graphite( event['message']['name'], RESOURCE_TYPE_VOLUME, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Delete resource from graphite job %s " "created" % job_id } )
def run(self): logger.log( "info", NS.publisher_id, {"message": "Deleting cluster details."}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) integration_id = self.parameters['TendrlContext.integration_id'] etcd_keys_to_delete = [] etcd_keys_to_delete.append("/clusters/%s/nodes" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Bricks" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Volumes" % integration_id) etcd_keys_to_delete.append("/clusters/%s/GlobalDetails" % integration_id) etcd_keys_to_delete.append("/clusters/%s/TendrlContext" % integration_id) etcd_keys_to_delete.append("/clusters/%s/Utilization" % integration_id) etcd_keys_to_delete.append("/clusters/%s/raw_map" % integration_id) etcd_keys_to_delete.append("/alerting/clusters/%s" % integration_id) nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id) node_ids = [] for node in nodes.leaves: node_id = node.key.split("/")[-1] node_ids.append(node_id) key = "/alerting/nodes/%s" % node_id etcd_keys_to_delete.append(key) try: # delete node alerts from /alerting/alerts node_alerts = etcd_utils.read(key) for node_alert in node_alerts.leaves: etcd_keys_to_delete.append("/alerting/alerts/%s" % node_alert.key.split("/")[-1]) except etcd.EtcdKeyNotFound: # No node alerts, continue pass # Find the alerting/alerts entries to be deleted try: cluster_alert_ids = etcd_utils.read("/alerting/clusters/%s" % integration_id) for entry in cluster_alert_ids.leaves: ca_id = entry.key.split("/")[-1] etcd_keys_to_delete.append("/alerting/alerts/%s" % ca_id) except etcd.EtcdKeyNotFound: # No cluster alerts, continue pass try: index_key = "/indexes/tags/tendrl/integration/%s" % integration_id _node_ids = etcd_utils.read(index_key).value _node_ids = json.loads(_node_ids) for _node_id in _node_ids[:]: node_obj = NS.tendrl.objects.NodeContext( node_id=_node_id).load() # Remove cluster indexes for down node if node_obj.status.lower() == "down": _node_ids.remove(_node_id) # Removing down node details logger.log( "warning", NS.publisher_id, { "message": "Deleting down node %s details" % node_obj.fqdn }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) etcd_keys_to_delete.append("/nodes/%s" % _node_id) etcd_utils.write(index_key, json.dumps(_node_ids)) except (etcd.EtcdKeyNotFound, ValueError, TypeError, AttributeError, IndexError): # If index details not present then we don't need to stop # un-manage flow, Because when node-agent work properly these # details are populated again by the node sync pass # Remove the cluster details for key in list(set(etcd_keys_to_delete)): try: etcd_utils.delete(key, recursive=True) except etcd.EtcdKeyNotFound: logger.log( "debug", NS.publisher_id, {"message": "%s key not found for deletion" % key}, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) continue # remove short name cluster = NS.tendrl.objects.Cluster( integration_id=integration_id).load() cluster.short_name = "" cluster.save() return True
def volume_delete(self, event): time.sleep(self.sync_interval) fetched_volumes = NS.tendrl.objects.GlusterVolume( NS.tendrl_context.integration_id).load_all() for fetched_volume in fetched_volumes: if fetched_volume.name == event['message']['name']: fetched_volume.deleted = True fetched_volume.deleted_at = time_utils.now() fetched_volume.save() try: sub_volumes = etcd_utils.read( "/clusters/{0}/Volumes/{1}/Bricks".format( NS.tendrl_context.integration_id, fetched_volume.vol_id)) for sub_volume in sub_volumes.leaves: bricks = etcd_utils.read(sub_volume.key) for brick in bricks.leaves: fqdn = brick.key.split('/')[-1].split(':')[0] path = brick.key.split('/')[-1].split(':')[-1][1:] # Delete brick dashboard from grafana brick_obj = NS.tendrl.objects.GlusterBrick( NS.tendrl_context.integration_id, fqdn, path).load() # Delete brick brick_path = "clusters/{0}/Bricks/"\ "all/{1}/{2}".format( NS.tendrl_context.integration_id, fqdn, path ) etcd_utils.delete(brick_path, recursive=True) brick_full_path = fqdn + ":" + brick_obj.\ brick_path.split(":")[-1] job_id = monitoring_utils.update_dashboard( "%s|%s" % (event['message']['name'], brick_full_path), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete") logger.log( "debug", NS.publisher_id, { "message": "Update dashboard job %s" " for brick %s " "in cluster %s created" % (job_id, brick.key.split('/')[-1], NS.tendrl_context.integration_id) }) # Delete brick from graphite job_id = monitoring_utils.\ delete_resource_from_graphite( "%s|%s" % ( event['message']['name'], brick_full_path ), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete" ) logger.log( "debug", NS.publisher_id, { "message": "Delete resource " "from graphite job %s " "for brick %s in cluster %s created" % (job_id, brick.key.split('/')[-1], NS.tendrl_context.integration_id) }) except etcd.EtcdKeyNotFound: pass # Delete volume dashboard from grafana job_id = monitoring_utils.update_dashboard( event['message']['name'], RESOURCE_TYPE_VOLUME, NS.tendrl_context.integration_id, "delete") logger.log("debug", NS.publisher_id, {"message": "Update dashboard job %s " "created" % job_id}) # Delete volume details from graphite job_id = monitoring_utils.delete_resource_from_graphite( event['message']['name'], RESOURCE_TYPE_VOLUME, NS.tendrl_context.integration_id, "delete") logger.log("debug", NS.publisher_id, { "message": "Delete resource from graphite job %s " "created" % job_id })
def volume_remove_brick_force(self, event): time.sleep(self.sync_interval) # Event returns bricks list as space separated single string bricks = event['message']['bricks'].split(" ") try: for brick in bricks: # find fqdn using ip ip = socket.gethostbyname(brick.split(":/")[0]) node_id = etcd_utils.read("indexes/ip/%s" % ip).value fqdn = NS.tendrl.objects.ClusterNodeContext( node_id=node_id).load().fqdn brick = fqdn + ":" + brick.split(":")[-1] fetched_brick = NS.tendrl.objects.GlusterBrick( NS.tendrl_context.integration_id, fqdn=brick.split(":/")[0], brick_dir=brick.split(":/")[1].replace('/', '_')).load() # delete brick etcd_utils.delete( "clusters/{0}/Bricks/all/{1}/{2}".format( NS.tendrl_context.integration_id, brick.split(":/")[0], brick.split(":/")[1].replace('/', '_')), recursive=True, ) # delete alert dashbaord job_id = monitoring_utils.update_dashboard( "%s|%s" % (event['message']['volume'], brick), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete") logger.log( "debug", NS.publisher_id, {"message": "Update dashboard job %s " "created" % job_id}) # delete brick details from graphite job_id = monitoring_utils.delete_resource_from_graphite( "%s|%s" % (event['message']['volume'], brick), RESOURCE_TYPE_BRICK, NS.tendrl_context.integration_id, "delete") logger.log( "debug", NS.publisher_id, { "message": "Delete resource from graphite job %s " "created" % job_id }) volume_brick_path = "clusters/{0}/Volumes/{1}/"\ "Bricks".format( NS.tendrl_context.integration_id, fetched_brick.vol_id, ) # remove all the brick infromation under volume as the # subvolume might have changed, let the next sync handle # the updation of brick info etcd_utils.delete(volume_brick_path, recursive=True) _trigger_sync_key = 'clusters/%s/_sync_now' % \ NS.tendrl_context.integration_id etcd_utils.write(_trigger_sync_key, 'true') etcd_utils.refresh(_trigger_sync_key, self.sync_interval) except etcd.EtcdKeyNotFound: logger.log("debug", NS.publisher_id, {"message": "Unable to delete bricks %s" % bricks})
def remove(key): etcd_utils.delete(key, recursive=True)
def sync(sync_ttl, node_status_ttl): try: NS.node_context = NS.node_context.load() logger.log("debug", NS.publisher_id, {"message": "Running SDS detection"}) try: sds_discovery_manager = sds_manager.SDSDiscoveryManager() except ValueError as ex: Event( ExceptionMessage(priority="debug", publisher=NS.publisher_id, payload={ "message": "Failed to init SDSDiscoveryManager.", "exception": ex })) return # Execute the SDS discovery plugins and tag the nodes with data for plugin in sds_discovery_manager.get_available_plugins(): sds_details = plugin.discover_storage_system() if sds_details is None: break if "peers" in sds_details and NS.tendrl_context.integration_id: _cnc = NS.tendrl.objects.ClusterNodeContext().load() this_peer_uuid = "" if _cnc.is_managed != "yes" or not NS.node_context.fqdn: for peer_uuid, data in sds_details.get("peers", {}).iteritems(): peer = NS.tendrl.objects.GlusterPeer( peer_uuid=peer_uuid, hostname=data['hostname'], connected=data['connected']) peer.save() if data['hostname'] == "localhost": this_peer_uuid = peer_uuid # Figure out the hostname used to probe this peer integration_id_index_key = \ "indexes/tags/tendrl/integration/%s" %\ NS.tendrl_context.integration_id _node_ids = etcd_utils.read(integration_id_index_key).value _node_ids = json.loads(_node_ids) for _node_id in _node_ids: if _node_id != NS.node_context.node_id: peer = NS.tendrl.objects.GlusterPeer( peer_uuid=this_peer_uuid, node_id=_node_id).load() if peer.hostname: NS.node_context.pkey = peer.hostname NS.node_context.fqdn = peer.hostname NS.node_context.ipv4_addr = \ socket.gethostbyname( peer.hostname ) NS.node_context.save(ttl=node_status_ttl) break if ('detected_cluster_id' in sds_details and sds_details['detected_cluster_id'] != ""): try: integration_index_key = \ "indexes/detected_cluster_id_to_integration_id/" \ "%s" % sds_details['detected_cluster_id'] dc = NS.tendrl.objects.DetectedCluster().load() if dc is None or dc.detected_cluster_id is None: time.sleep(sync_ttl) integration_id = str(uuid.uuid4()) try: etcd_utils.write(integration_index_key, integration_id, prevExist=False) except etcd.EtcdAlreadyExist: pass _ptag = None if NS.tendrl_context.integration_id: _ptag = "provisioner/%s" % \ NS.tendrl_context.integration_id if _ptag in NS.node_context.tags: if dc.detected_cluster_id and \ dc.detected_cluster_id != sds_details.get( 'detected_cluster_id'): # Gluster peer list has changed integration_id = \ NS.tendrl_context.integration_id etcd_utils.write(integration_index_key, integration_id) _cluster = NS.tendrl.objects.Cluster( integration_id=integration_id).load() # If peer detached for down node before import # then it should not block the import by # changing cluster status if _cluster.is_managed == "yes": _cluster.status = "new_peers_detected" _cluster.save() # Raise an alert regarding the same msg = "New peers identified in cluster: " \ "%s. Make sure tendrl-ansible is " \ "executed for the new nodes so that " \ "expand cluster option can be " \ "triggered" % _cluster.short_name event_utils.emit_event( "cluster_status", "new_peers_detected", msg, "cluster_{0}".format(integration_id), "WARNING", integration_id=integration_id) _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id ).load() if _cluster.status == "new_peers_detected": peers = [] cmd = subprocess.Popen("gluster pool list", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = cmd.communicate() if err or out is None or \ "Connection failed" in out: pass # set the no of peers as zero if out: lines = out.split('\n')[1:] for line in lines: if line.strip() != '': peers.append(line.split()[0]) nodes_ids = json.loads( etcd_utils.read( "indexes/tags/tendrl/integration/%s" % NS.tendrl_context.integration_id).value ) if len(nodes_ids) == len(peers): # All the nodes are having node-agents # running and known to tendrl msg = "New nodes in cluster: %s have " \ "node agents running now. Cluster " \ "is ready to expand." % \ _cluster.short_name event_utils.emit_event( "cluster_status", "expand_pending", msg, "cluster_{0}".format( NS.tendrl_context.integration_id), "INFO", integration_id=NS.tendrl_context. integration_id) # Set the cluster status accordingly _cluster.status = 'expand_pending' _cluster.save() loop_count = 0 while True: # Wait till provisioner node assigns # integration_id for this detected_cluster_id if loop_count >= 72: return try: time.sleep(5) integration_id = etcd_utils.read( integration_index_key).value if integration_id: break except etcd.EtcdKeyNotFound: loop_count += 1 continue NS.tendrl_context.integration_id = integration_id NS.tendrl_context.cluster_id = sds_details.get( 'detected_cluster_id') NS.tendrl_context.cluster_name = sds_details.get( 'detected_cluster_name') NS.tendrl_context.sds_name = sds_details.get('pkg_name') NS.tendrl_context.sds_version = sds_details.get( 'pkg_version') NS.tendrl_context.save() NS.node_context = NS.node_context.load() integration_tag = "tendrl/integration/%s" % \ integration_id detected_cluster_tag = "detected_cluster/%s" % \ sds_details[ 'detected_cluster_id'] # Detected cluster id will change when new node # added into peer list and when peer detach happens, # Node_context should not maintain multiple DC ids old_dc_id = "detected_cluster/%s" % dc.detected_cluster_id if old_dc_id in NS.node_context.tags and \ old_dc_id != detected_cluster_tag: NS.node_context.tags.remove(old_dc_id) # remove old detected cluster_id from indexes indexes_keys = [] indexes_keys.append( "indexes/detected_cluster_id_to_integration_id" "/%s" % dc.detected_cluster_id) indexes_keys.append( "indexes/tags/detected_cluster/%s" % dc.detected_cluster_id) for indexes_key in indexes_keys: try: etcd_utils.delete(indexes_key) except etcd.EtcdKeyNotFound: # It may be removed by other nodes # in a same cluster pass NS.node_context.tags += [ detected_cluster_tag, integration_tag ] NS.node_context.tags = list(set(NS.node_context.tags)) NS.node_context.save(ttl=node_status_ttl) NS.tendrl.objects.DetectedCluster( detected_cluster_id=sds_details.get( 'detected_cluster_id'), detected_cluster_name=sds_details.get( 'detected_cluster_name'), sds_pkg_name=sds_details.get('pkg_name'), sds_pkg_version=sds_details.get('pkg_version'), ).save() _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id).load( ) if _cluster.current_job.get( 'status', '' ) in ['', 'finished', 'failed'] \ and _cluster.status in [None, ""]: _cluster.save() except (etcd.EtcdException, KeyError) as ex: Event( ExceptionMessage(priority="debug", publisher=NS.publisher_id, payload={ "message": "Failed SDS detection", "exception": ex })) break except Exception as ex: Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": "node_sync " "SDS detection failed: " + ex.message, "exception": ex }))