def test_serverAliveCurrentServerDead(self): n = NodeMonitoring() server = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 9, 'os': platform.system().lower(), 'roles': [], 'prototypes': [], 'active': False, 'activationTime': datetime.datetime.utcnow() }).inserted_id n.ioc.getCollection('ServerHealth').insert_one({ 'server': server, 'jobs': 9, 'checkTime': datetime.datetime.utcnow() - datetime.timedelta(minutes=10, seconds=1) }) self.assertFalse(n.serverAlive(str(server))) self.assertEqual( n.ioc.getCollection('ServerHealth').find({ 'server': server, 'jobs': 9 }).count(), 1) self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server }).deleted_count, 1) n.ioc.getCollection('ServerHealth').drop()
def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.imp = ImportTool(self.ioc.getLogger()) self.monitor = NodeMonitoring(self.ioc)
def test_scanComplete(self): n = NodeMonitoring() originalServer = n.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(n.ioc.getConfig().NodeIdentity)}) self.assertTrue(originalServer) originalServer = dict(originalServer) originalSourcing = n.ioc.getCollection('SourceData').find({ 'grease_data.execution.server': ObjectId(originalServer.get('_id')) }).count() n.scanComplete() newSourcing = n.ioc.getCollection('SourceData').find({ 'grease_data.execution.server': ObjectId(originalServer.get('_id')) }).count() newServer = n.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(n.ioc.getConfig().NodeIdentity)}) self.assertTrue(newServer) newServer = dict(newServer) self.assertGreaterEqual(newSourcing, originalSourcing) self.assertGreaterEqual(newServer.get('jobs'), originalServer.get('jobs')) n.ioc.getCollection('SourceData').drop() n.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(originalServer.get('_id'))}, {'$set': { 'jobs': originalServer.get('jobs', 0) }})
def execute(self, context): """This method monitors the environment An [in]finite loop monitoring the cluster nodes for unhealthy ones Args: context (dict): context for the command to use Returns: bool: Command Success """ if context.get('foreground'): self.ioc.getLogger().foreground = True monitor = NodeMonitoring(self.ioc) if context.get('loop'): i = 0 while i < int(context.get('loop', 0)): if not monitor.monitor(): self.ioc.getLogger().error("Monitoring Process Failed", notify=False) else: monitor.scanComplete() i += 1 time.sleep(5) else: while True: if not monitor.monitor(): self.ioc.getLogger().error("Monitoring Process Failed", notify=False) else: monitor.scanComplete() # sleep for a random interval to ensure not all nodes poll at the same time time.sleep(int(str(random.choice(range(1, 5))) + str(random.choice(range(0, 9))))) if context.get('foreground'): self.ioc.getLogger().foreground = False return True
def test_serverDeactivateFailed(self): n = NodeMonitoring() server = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 9, 'os': platform.system().lower(), 'roles': [], 'prototypes': [], 'active': False, 'activationTime': datetime.datetime.utcnow() }).inserted_id self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server }).deleted_count, 1) self.assertFalse(n.deactivateServer(str(server))) n.ioc.getCollection('ServerHealth').drop()
def test_get_servers(self): n = NodeMonitoring() server = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 10, 'os': platform.system().lower(), 'roles': [], 'prototypes': [], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id result = n.getServers() self.assertGreaterEqual(len(result), 1) self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server }).deleted_count, 1)
class BridgeCommand(object): """Methods for Cluster Administration Attributes: imp (ImportTool): Import Tool Instance monitor (NodeMonitoring): Node Monitoring Model Instance """ def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.imp = ImportTool(self.ioc.getLogger()) self.monitor = NodeMonitoring(self.ioc) def action_register(self): """Ensures Registration of server Returns: bool: Registration status """ self.ioc.getLogger().debug("Registration Requested") if self.ioc.ensureRegistration(): print("Registration Complete!") self.ioc.getLogger().info("Registration Completed Successfully") return True print("Registration Failed!") self.ioc.getLogger().info("Registration Failed") return False def action_info(self, node=None, jobs=None, prototypeJobs=None): """Gets Node Information Args: node (str): MongoDB Object ID to get information about jobs (bool): If true then will retrieve jobs executed by this node prototypeJobs (bool): If true then prototype jobs will be printed as well Note: provide a node argument via the CLI --node=4390qwr2fvdew458239 Note: provide a jobs argument via teh CLI --jobs Note: provide a prototype jobs argument via teh CLI --pJobs Returns: bool: If Info was found """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False server = self.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(str(serverId))}) if server: server = dict(server) print(""" <<<<<<<<<<<<<< SERVER: {0} >>>>>>>>>>>>>> Activation State: {1} Date: {2} Jobs: {3} Operating System: {4} Prototypes: {5} Execution Roles: {6} """.format(server.get('_id'), server.get('active'), server.get('activationTime'), server.get('jobs'), server.get('os'), server.get('prototypes'), server.get('roles'))) if jobs and prototypeJobs: print( "======================= SOURCING =======================") for job in self.ioc.getCollection('SourceData').find( {'grease_data.sourcing.server': ObjectId(serverId)}): print( """ ------------------------------- Job: {0} ------------------------------- """, job['_id']) if jobs and prototypeJobs: print( "======================= DETECTION =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.detection.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Start Time: {1} End Time: {2} Context: {3} ------------------------------- """.format(job['_id'], job['grease_data']['detection']['start'], job['grease_data']['detection']['end'], job['grease_data']['detection']['detection'])) if jobs and prototypeJobs: print( "======================= SCHEDULING =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.scheduling.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Start Time: {1} End Time: {2} ------------------------------- """.format(job['_id'], job['grease_data']['scheduling']['start'], job['grease_data']['scheduling']['end'])) if jobs: print( "======================= EXECUTION =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.execution.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Assignment Time: {1} Completed Time: {2} Execution Success: {3} Command Success: {4} Failures: {5} Return Data: {6} ------------------------------- """.format( job['_id'], job['grease_data']['execution']['assignmentTime'], job['grease_data']['execution']['completeTime'], job['grease_data']['execution']['executionSuccess'], job['grease_data']['execution']['commandSuccess'], job['grease_data']['execution']['failures'], job['grease_data']['execution']['returnData'])) return True print("Unable to locate server") self.ioc.getLogger().error( "Unable to load [{0}] server for information".format(serverId)) return False def action_assign(self, prototype=None, role=None, node=None): """Assign prototypes/roles to a node either local or remote Args: prototype (str): Prototype Job to assign role (str): Role to assign node (str): MongoDB ObjectId of node to assign to, if not provided will default to the local node Returns: bool: If successful true else false """ assigned = False if prototype: job = self.imp.load(str(prototype)) if not job or not isinstance(job, Command): print( "Cannot find prototype [{0}] to assign check search path!". format(prototype)) self.ioc.getLogger().error( "Cannot find prototype [{0}] to assign check search path!". format(prototype)) return False # Cleanup job job.__del__() del job valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$addToSet': { 'prototypes': prototype } }).acknowledged if updated: print("Prototype Assigned") self.ioc.getLogger().info( "Prototype [{0}] assigned to server [{1}]".format( prototype, serverId)) assigned = True else: print("Prototype Assignment Failed!") self.ioc.getLogger().info( "Prototype [{0}] assignment failed to server [{1}]".format( prototype, serverId)) return False if role: valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$push': { 'roles': role } }).acknowledged if updated: print("Role Assigned") self.ioc.getLogger().info( "Role [{0}] assigned to server [{1}]".format( prototype, serverId)) assigned = True else: print("Role Assignment Failed!") self.ioc.getLogger().info( "Role [{0}] assignment failed to server [{1}]".format( prototype, serverId)) return False if not assigned: print("Assignment failed, please check logs for details") return assigned def action_unassign(self, prototype=None, role=None, node=None): """Unassign prototypes to a node either local or remote Args: prototype (str): Prototype Job to unassign role (str): Role to unassign node (str): MongoDB ObjectId of node to unassign to, if not provided will default to the local node Returns: bool: If successful true else false """ unassigned = False if prototype: job = self.imp.load(str(prototype)) if not job or not isinstance(job, Command): print( "Cannot find prototype [{0}] to unassign check search path!" .format(prototype)) self.ioc.getLogger().error( "Cannot find prototype [{0}] to unassign check search path!" .format(prototype)) return False # Cleanup job job.__del__() del job valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$pull': { 'prototypes': prototype } }).acknowledged if updated: print("Prototype Assignment Removed") self.ioc.getLogger().info( "Prototype [{0}] unassigned from server [{1}]".format( prototype, serverId)) unassigned = True else: print("Prototype Unassignment Failed!") self.ioc.getLogger().info( "Prototype [{0}] unassignment failed from server [{1}]". format(prototype, serverId)) return False if role: valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$pull': { 'roles': role } }).acknowledged if updated: print("Role Removed") self.ioc.getLogger().info( "Role [{0}] removed to server [{1}]".format( prototype, serverId)) unassigned = True else: print("Role Removal Failed!") self.ioc.getLogger().info( "Role [{0}] removal failed to server [{1}]".format( prototype, serverId)) return False if not unassigned: print("Unassignment failed, please check logs for details") return unassigned def action_cull(self, node=None): """Culls a server from the active cluster Args: node (str): MongoDB ObjectId to cull; defaults to local node """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False if not self.monitor.deactivateServer(serverId): self.ioc.getLogger().error( "Failed deactivating server [{0}]".format(serverId)) print("Failed deactivating server [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate detect jobs".format( serverId)) if not self.monitor.rescheduleDetectJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate schedule jobs".format( serverId)) if not self.monitor.rescheduleScheduleJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate jobs".format(serverId)) if not self.monitor.rescheduleJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False print("Server Deactivated") return True def action_activate(self, node=None): """activates server in cluster Args: node (str): MongoDB ObjectId to activate; defaults to local node Returns: bool: If activation is successful """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False if self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$set': { 'active': True, 'activationTime': datetime.datetime.utcnow() } }).modified_count < 1: self.ioc.getLogger().warning( "Server [{0}] failed to be activated".format(serverId)) return False self.ioc.getLogger().warning("Server [{0}] activated".format(serverId)) return True def valid_server(self, node=None): """Validates node is in the MongoDB instance connected to Args: node (str): MongoDB Object ID to validate; defaults to local node Returns: tuple: first element is boolean if valid second is objectId as string """ if node: try: server = self.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(str(node))}) except InvalidId: self.ioc.getLogger().error( "Invalid ObjectID passed to bridge info [{0}]".format( node)) return False, "" if server: return True, dict(server).get('_id') self.ioc.getLogger().error( "Failed to find server [{0}] in the database".format(node)) return False, "" return True, self.ioc.getConfig().NodeIdentity
def test_rescheduleJobsFailed(self): n = NodeMonitoring() p = PrototypeConfig(n.ioc) server1 = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 9, 'os': platform.system().lower(), 'roles': [], 'prototypes': [], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id server2 = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 9, 'os': platform.system().lower(), 'roles': ['test1'], 'prototypes': ['detect', 'schedule'], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id config = n.ioc.getCollection('Configuration').insert_one({ 'active': True, 'type': 'prototype_config', "name": "test", "job": "help", "exe_env": "test", "source": "test", "logic": { "Regex": [{ "field": "url", "pattern": ".*", 'variable': True, 'variable_name': 'url' }], 'Range': [{ 'field': 'status_code', 'min': 199, 'max': 201 }] }, 'constants': { 'test': 'ver' } }).inserted_id p.load(reloadConf=True) source = n.ioc.getCollection('SourceData').insert_one({ 'grease_data': { 'sourcing': { 'server': server1 }, 'detection': { 'server': server1, 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow(), 'detection': {} }, 'scheduling': { 'server': server1, 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow(), }, 'execution': { 'server': server1, 'assignmentTime': datetime.datetime.utcnow(), 'completeTime': None, 'returnData': {}, 'executionSuccess': False, 'commandSuccess': False, 'failures': 0 } }, 'source': 'test', 'configuration': 'test', 'createTime': datetime.datetime.utcnow(), 'expiry': Deduplication.generate_max_expiry_time(1) }).inserted_id self.assertTrue(n.deactivateServer(str(server1))) self.assertFalse(n.rescheduleJobs(str(server1))) self.assertFalse( n.ioc.getCollection('SourceData').find({ '_id': source, 'grease_data.execution.server': server2 }).count()) self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server1 }).deleted_count, 1) self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server2 }).deleted_count, 1) n.ioc.getCollection('SourceData').delete_one({'_id': source}) n.ioc.getCollection('Configuration').drop() n.ioc.getCollection('ServerHealth').drop() p.load(reloadConf=True)
def test_rescheduleDetectJobsFailed(self): n = NodeMonitoring() server1 = n.ioc.getCollection('JobServer').insert_one({ 'jobs': 9, 'os': platform.system().lower(), 'roles': [], 'prototypes': [], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id n.ioc.getCollection('JobServer').update_many( {'active': True}, {'$set': { 'prototypes': [] }}) source = n.ioc.getCollection('SourceData').insert_one({ 'grease_data': { 'sourcing': { 'server': server1 }, 'detection': { 'server': server1, 'start': None, 'end': None, 'detection': {} }, 'scheduling': { 'server': None, 'start': None, 'end': None }, 'execution': { 'server': None, 'assignmentTime': None, 'completeTime': None, 'returnData': {}, 'executionSuccess': False, 'commandSuccess': False, 'failures': 0 } }, 'source': 'test', 'configuration': 'test', 'data': { 'test': 'ver' }, 'createTime': datetime.datetime.utcnow(), 'expiry': Deduplication.generate_max_expiry_time(1) }).acknowledged self.assertTrue(n.deactivateServer(str(server1))) self.assertFalse(n.rescheduleDetectJobs(str(server1))) self.assertEqual( n.ioc.getCollection('JobServer').delete_one({ '_id': server1 }).deleted_count, 1) n.ioc.getCollection('SourceData').delete_one({'_id': source}) n.ioc.getCollection('ServerHealth').drop()