Esempio n. 1
0
    def parse_source(self, configuration):
        """This will Query the SQL Server to find data

        Args:
            configuration (dict): Configuration of Source. See Class Documentation above for more info

        Returns:
            bool: If True data will be scheduled for ingestion after deduplication. If False the engine will bail out

        """
        ioc = GreaseContainer()
        if configuration.get('hour'):
            if datetime.datetime.utcnow().hour != int(configuration.get('hour')):
                # it is not the correct hour
                return True
        if configuration.get('minute'):
            if datetime.datetime.utcnow().minute != int(configuration.get('minute')):
                # it is not the correct hour
                return True
        if configuration.get('type') != 'postgresql':
            ioc.getLogger().error("Unsupported SQL Server Type; Currently Only supporting PostgreSQL", notify=False)
            return False
        else:
            # Attempt to get the DSN for the connection
            if os.environ.get(configuration.get('dsn')) and configuration.get('query'):
                # ensure the DSN is setup and the query is present
                try:
                    DSN = os.environ.get(configuration.get('dsn'))
                    with psycopg2.connect(DSN) as conn:
                        with conn.cursor(cursor_factory=RealDictCursor) as cursor:
                            cursor.execute(configuration.get('query'))
                            data = cursor.fetchall()
                            for row in data:
                                self._data.append(row)
                            del ioc
                    return True
                except Exception as e:
                    # Naked except to prevent issues around connections
                    ioc.getLogger().error("Error processing configuration; Error [{0}]".format(e.message), notify=False)
                    del ioc
                    return False
            else:
                # could not get the DSN
                ioc.getLogger().error("Failed to locate the DSN variable", notify=False)
                del ioc
                return False
Esempio n. 2
0
class Command(object):
    """Abstract class for commands in GREASE

    Attributes:
        __metaclass__ (ABCMeta): Metadata class object
        purpose (str): The purpose of the command
        help (str): Help string for the command line
        __author__ (str): Authorship string
        __version__ (str): Command Version
        os_needed (str): If a specific OS is needed then set this
        ioc (GreaseContainer): IOC container for access to system resources
        variable_storage (pymongo.collection): collection object for command

    """

    ###
    # Command Metadata information
    ###
    purpose = "Default"
    help = """
    No Help Information Provided
    """
    __author__ = "Jimmy The Programmer"
    __version__ = "1.0.0"
    os_needed = None
    __metaclass__ = ABCMeta

    def __init__(self, Logger=None):
        if Logging and isinstance(Logger, Logging):
            self.ioc = GreaseContainer(Logger)
        else:
            self.ioc = GreaseContainer()
        self.variable_storage = self.ioc.getMongo()\
            .Client()\
            .get_database(self.ioc.getConfig().get('Connectivity', 'MongoDB').get('db', 'grease'))\
            .get_collection(self.__class__.__name__)
        self.start_time = datetime.utcnow()
        self.exec_data = {'execVal': False, 'retVal': False, 'data': {}}

    def getExecVal(self):
        """Get the execution attempt success

        Returns:
            bool: If the command executed without exception

        """
        return self.exec_data.get('execVal', False)

    def getRetVal(self):
        """Get the execution boolean return state

        Returns:
            bool: the boolean return value of execute

        """
        return self.exec_data.get('retVal', False)

    def getData(self):
        """Get any data the execute method wanted to put into telemetry

        Returns:
            dict: The Key/Value pairs from the execute method execution

        """
        return self.exec_data.get('data', {})

    def setData(self, Key, Data):
        """Put Data into the data object to be inserted into telemetry

        Args:
            Key (str): Key for the data to be stored
            Data (object): JSON-able object to store

        Returns:
            None: Void Method to put data

        """
        self.exec_data['data'][Key] = Data

    def __del__(self):
        # close mongo connection
        self.ioc.getMongo().Close()

    def safe_execute(self, context=None):
        """Attempt execution and prevent MOST exceptions

        Args:
            context (dict): context for the command to use

        Returns:
            None: Void method to attempt exceptions

        """
        if not context:
            context = {}
        try:
            try:
                self.exec_data['execVal'] = True
                self.exec_data['retVal'] = bool(self.execute(context))
            except BaseException:
                self.exec_data['execVal'] = False
                exc_type, exc_obj, exc_tb = sys.exc_info()
                # Find initial traceback frame
                current_tb = exc_tb
                while current_tb.tb_next:
                    current_tb = current_tb.tb_next

                self.ioc.getLogger().error(
                    "Failed to execute [{0}] execute got exception!".format(
                        self.__class__.__name__),
                    additional={
                        'file':
                        os.path.split(
                            current_tb.tb_frame.f_code.co_filename)[1],
                        'type':
                        exc_type,
                        'line':
                        current_tb.tb_lineno
                    })
            except:
                self.ioc.getLogger().error(
                    "Failed to execute [{0}] execute got exception!".format(
                        self.__class__.__name__), )
        except:
            self.ioc.getLogger().error(
                "Failed to execute [{0}] execute major exception".format(
                    self.__class__.__name__), )

    @abstractmethod
    def execute(self, context):
        """Base Execute Method

        This method should *always* be overridden in child classes. This is the code that will run when your command
        is called. If this method is not implemented then the class will fail loading.

        Args:
            context (dict): context for the command to use

        Returns:
            bool: Command Success

        """
        pass
Esempio n. 3
0
class Scheduling(object):
    """Central scheduling class for GREASE

    This class routes data to nodes within GREASE

    Attributes:
        ioc (GreaseContainer): IoC access for DeDuplication

    """

    def __init__(self, ioc=None):
        if isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.ioc.ensureRegistration()

    def scheduleDetection(self, source, configName, data):
        """Schedule a Source Parse to detection

        This method will take a list of single dimension dictionaries and schedule them for detection

        Args:
            source (str): Name of the source
            configName (str): Configuration Data was sourced from
            data (list[dict]): Data to be scheduled for detection

        Returns:
            bool: Scheduling success

        """
        if len(data) is 0 or not isinstance(data, list):
            self.ioc.getLogger().trace(
                "Data provided empty or is not type list type: [{0}] len: [{1}]".format(str(type(data)), len(data)),
                trace=True
            )
            return False
        self.ioc.getLogger().trace("Preparing to schedule [{0}] source objects".format(len(data)), trace=True)
        sourceCollect = self.ioc.getCollection('SourceData')
        jServerCollect = self.ioc.getCollection('JobServer')
        # begin scheduling loop of each block
        for elem in data:
            if not isinstance(elem, dict):
                self.ioc.getLogger().warning(
                    "Element from data not of type dict! Got [{0}] DROPPED".format(str(type(elem))),
                    notify=False
                )
                continue
            server, jobCount = self.determineDetectionServer()
            if server:
                sourceCollect.insert_one({
                    'grease_data': {
                        'sourcing': {
                            'server': ObjectId(self.ioc.getConfig().NodeIdentity)
                        },
                        'detection': {
                            'server': ObjectId(server),
                            'start': None,
                            'end': None,
                            'detection': {}
                        },
                        'scheduling': {
                            'server': None,
                            'start': None,
                            'end': None
                        },
                        'execution': {
                            'server': None,
                            'assignmentTime': None,
                            'completeTime': None,
                            'returnData': {},
                            'executionSuccess': False,
                            'commandSuccess': False,
                            'failures': 0
                        }
                    },
                    'source': str(source),
                    'configuration': str(configName),
                    'data': elem,
                    'createTime': datetime.datetime.utcnow(),
                    'expiry': Deduplication.generate_max_expiry_time(1)
                })
                jServerCollect.update_one({
                    '_id': ObjectId(server)},
                    {'$set': {'jobs': int(jobCount) + 1}}
                )
            else:
                self.ioc.getLogger().warning(
                    "Failed to find detection server for data object from source [{0}]; DROPPED".format(source),
                    notify=False
                )
                self.ioc.getLogger().warning(
                    "Detection scheduling failed. Could not find detection server",
                    notify=False
                )
                return False
        return True

    def scheduleScheduling(self, objectId):
        """Schedule a source for job scheduling

        This method schedules a source for job scheduling

        Args:
            objectId (str): MongoDB ObjectId to schedule

        Returns:
            bool: If scheduling was successful

        """
        server, jobCount = self.determineSchedulingServer()
        if not server:
            self.ioc.getLogger().error("Failed to find scheduling server", notify=False)
            return False
        self.ioc.getCollection('SourceData').update_one(
            {'_id': ObjectId(objectId)},
            {
                '$set': {
                    'grease_data.scheduling.server': ObjectId(server),
                    'grease_data.scheduling.start': None,
                    'grease_data.scheduling.end': None
                }
            }
        )
        self.ioc.getCollection('SourceData').update_one({
            '_id': ObjectId(server)},
            {'$set': {'jobs': int(jobCount) + 1}}
        )
        return True

    def determineDetectionServer(self):
        """Determines detection server to use

        Finds the detection server available for a new detection job

        Returns:
            tuple: MongoDB Object ID of server & current job count

        """
        result = self.ioc.getCollection('JobServer').find({
            'active': True,
            'prototypes': 'detect'
        }).sort('jobs', pymongo.ASCENDING).limit(1)
        if result.count():
            return str(result[0]['_id']), int(result[0]['jobs'])
        else:
            return "", 0

    def determineSchedulingServer(self):
        """Determines scheduling server to use

        Finds the scheduling server available for a new scheduling job

        Returns:
            tuple: MongoDB Object ID of server & current job count

        """
        result = self.ioc.getCollection('JobServer').find({
            'active': True,
            'prototypes': 'schedule'
        }).sort('jobs', pymongo.DESCENDING).limit(1)
        if result.count():
            return str(result[0]['_id']), int(result[0]['jobs'])
        else:
            return "", 0

    def determineExecutionServer(self, role):
        """Determines execution server to use

        Finds the execution server available for a new execution job

        Returns:
            str: MongoDB Object ID of server; if one cannot be found then string will be empty

        """
        result = self.ioc.getCollection('JobServer').find({
            'active': True,
            'roles': str(role)
        }).sort('jobs', pymongo.DESCENDING).limit(1)
        if result.count():
            return str(result[0]['_id']), int(result[0]['jobs'])
        else:
            return "", 0
Esempio n. 4
0
class DaemonProcess(object):
    """Actual daemon processing for GREASE Daemon

    Attributes:
        ioc (GreaseContainer): The Grease IOC
        current_real_second (int): Current second in time
        registered (bool): If the node is registered with MongoDB
        impTool (ImportTool): Instance of Import Tool
        conf (PrototypeConfig): Prototype Configuration Instance

    """

    ioc = None
    current_real_second = None
    registered = True
    contextManager = {'jobs': {}, 'prototypes': {}}
    impTool = None

    def __init__(self, ioc):
        if isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.current_real_second = datetime.utcnow().second
        if self.ioc.getConfig(
        ).NodeIdentity == "Unknown" and not self.register():
            self.registered = False
        self.impTool = ImportTool(self.ioc.getLogger())
        self.conf = PrototypeConfig(self.ioc)

    def server(self):
        """Server process for ensuring prototypes & jobs are running

        By Running this method this will clear the DB of any jobs a node may have

        Returns:
            bool: Server Success

        """
        # Ensure we aren't swamping the system
        cpu = cpu_percent(interval=.1)
        mem = virtual_memory().percent
        if \
                cpu >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')) \
                or mem >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')):
            self.ioc.getLogger().trace(
                "Thread Maximum Reached CPU: [{0}] Memory: [{1}]".format(
                    cpu, mem),
                trace=True)
            # remove variables
            del cpu
            del mem
            return True
        if not self.registered:
            self.ioc.getLogger().trace("Server is not registered", trace=True)
            return False
        self.ioc.getLogger().trace("Server execution starting", trace=True)
        # establish job collection
        JobsCollection = self.ioc.getCollection("SourceData")
        self.ioc.getLogger().trace("Searching for Jobs", trace=True)
        jobs = JobsCollection.find({
            'grease_data.execution.server':
            ObjectId(self.ioc.getConfig().NodeIdentity),
            'grease_data.execution.commandSuccess':
            False,
            'grease_data.execution.executionSuccess':
            False,
            'grease_data.execution.failures': {
                '$lt': 6
            }
        })
        # Get Node Information
        Node = self.ioc.getCollection('JobServer').find_one(
            {'_id': ObjectId(self.ioc.getConfig().NodeIdentity)})
        if not Node:
            # If for some reason we couldn't find it
            self.ioc.getLogger().error("Failed To Load Node Information")
            return False
        # Get Prototypes
        prototypes = list(Node.get('prototypes'))
        # Del node instance
        del Node
        if prototypes:
            # We have prototypes to spin up
            for prototype in prototypes:
                self.ioc.getLogger().trace(
                    "Passing ProtoType [{0}] to Runner".format(prototype),
                    trace=True)
                self._run_prototype(prototype)
        if jobs.count():
            self.ioc.getLogger().trace("Total Jobs to Execute: [{0}]".format(
                jobs.count()))
            for job in jobs:
                self.ioc.getLogger().trace(
                    "Passing Job [{0}] to Runner".format(job.get("_id")),
                    trace=True)
                self._run_job(job, JobsCollection)
        else:
            # Nothing to Run for Jobs
            self.ioc.getLogger().trace("No Jobs Scheduled to Server",
                                       trace=True)
        self.ioc.getLogger().trace("Server execution complete", trace=True)
        return True

    def _run_job(self, job, JobCollection):
        """Run a On-Demand Job

        Args:
            job (dict): Job Data to execute
            JobCollection (pymongo.collection.Collection): JobCollection to update for telemetry

        Returns:
            None: Void Method to kickoff execution

        """
        if not self.contextManager['jobs'].get(job.get('_id')):
            # New Job to run
            if isinstance(job.get('configuration'), bytes):
                conf = job.get('configuration').decode()
            else:
                conf = job.get('configuration')
            inst = self.impTool.load(self.conf.get_config(conf).get('job', ''))
            if inst and isinstance(inst, Command):
                inst.ioc.getLogger().foreground = self.ioc.getLogger(
                ).foreground
                thread = threading.Thread(
                    target=inst.safe_execute,
                    args=(job.get('grease_data',
                                  {}).get('detection',
                                          {}).get('detection', {}), ),
                    name="GREASE DAEMON COMMAND EXECUTION [{0}]".format(
                        job.get('_id')))
                thread.daemon = True
                thread.start()
                self.contextManager['jobs'][job.get("_id")] = {
                    'thread': thread,
                    'command': inst
                }
            else:
                # Invalid Job
                del inst
                self.ioc.getLogger().warning("Invalid Job", additional=job)
                JobCollection.update_one({'_id': ObjectId(job['_id'])}, {
                    '$set': {
                        'grease_data.execution.failures':
                        job.get('failures', 0) + 1
                    }
                })
            return
        else:
            # Job already executing
            if self.contextManager['jobs'].get(
                    job.get('_id')).get('thread').isAlive():
                # thread still executing
                return
            else:
                # Execution has ended
                self.ioc.getLogger().trace("Job [{0}] finished running".format(
                    job.get('_id')),
                                           trace=True)
                finishedJob = self.contextManager['jobs'].get(
                    job.get('_id')).get('command')  # type: Command
                if finishedJob.getRetVal():
                    # job completed successfully
                    JobCollection.update_one(
                        {'_id': ObjectId(job.get('_id'))}, {
                            '$set': {
                                'grease_data.execution.commandSuccess':
                                finishedJob.getRetVal(),
                                'grease_data.execution.executionSuccess':
                                finishedJob.getExecVal(),
                                'grease_data.execution.completeTime':
                                datetime.utcnow(),
                                'grease_data.execution.returnData':
                                finishedJob.getData()
                            }
                        })
                else:
                    # Job Failure
                    self.ioc.getLogger().warning(
                        "Job Failed [{0}]".format(job.get('_id')),
                        additional=finishedJob.getData())
                    # TODO: Job Execution cooldown timing
                    JobCollection.update_one({'_id': ObjectId(job['_id'])}, {
                        '$set': {
                            'grease_data.execution.failures':
                            job.get('grease_data', {}).get(
                                'execution', {}).get('failures', 0) + 1
                        }
                    })
                # close out job
                finishedJob.__del__()
                del finishedJob
                # remove from contextManager
                del self.contextManager['jobs'][job.get('_id')]
                return

    def _run_prototype(self, prototype):
        """Startup a ProtoType

        Args:
            prototype (str): ProtoType to start

        Returns:
            None: Void method to start prototype

        """
        if not self.contextManager['prototypes'].get(prototype):
            # ProtoType has not started
            inst = self.impTool.load(prototype)
            if not isinstance(inst, Command):
                # invalid ProtoType
                self.log_once_per_second(
                    "Invalid ProtoType [{0}]".format(prototype), level=ERROR)
                return
            inst.ioc.getLogger().foreground = self.ioc.getLogger().foreground
            thread = threading.Thread(
                target=inst.safe_execute,
                args=({}),
                name="GREASE DAEMON PROTOTYPE [{0}]".format(prototype))
            thread.daemon = True
            thread.start()
            self.contextManager['prototypes'][prototype] = thread
            return
        else:
            # ensure thread is alive
            if self.contextManager['prototypes'].get(prototype).isAlive():
                self.ioc.getLogger().trace(
                    "ProtoType [{0}] is alive".format(prototype))
                return
            else:
                # Thread died for some reason
                self.log_once_per_second(
                    "ProtoType [{0}] Stopped".format(prototype), level=INFO)
                inst = self.impTool.load(prototype)
                if not isinstance(inst, Command):
                    self.log_once_per_second(
                        "Invalid ProtoType [{0}]".format(prototype),
                        level=ERROR)
                    return
                inst.ioc.getLogger().foreground = self.ioc.getLogger(
                ).foreground
                thread = threading.Thread(
                    target=inst.execute,
                    name="GREASE DAEMON PROTOTYPE [{0}]".format(prototype))
                thread.daemon = True
                thread.start()
                self.contextManager['prototypes'][prototype] = thread
                return

    def drain_jobs(self, JobCollection):
        """Will drain jobs from the current context

        This method is used to prevent abnormal ending of executions

        Args:
            JobCollection (pymongo.collection.Collection): Job Collection Object

        Returns:
            bool: When job queue is emptied

        """
        Threads = True
        while Threads:
            if self.contextManager['jobs']:
                jobs = {}
                for key, val in self.contextManager['jobs'].items():
                    if val['thread'].isAlive():
                        jobs[key] = val
                        continue
                    else:
                        # Execution has ended
                        self.ioc.getLogger().trace(
                            "Job [{0}] finished running".format(key),
                            trace=True)
                        finishedJob = self.contextManager['jobs'].get(key).get(
                            'command')  # type: Command
                        if finishedJob.getRetVal():
                            # job completed successfully
                            JobCollection.update_one({'_id': ObjectId(key)}, {
                                '$set': {
                                    'grease_data.execution.commandSuccess':
                                    finishedJob.getRetVal(),
                                    'grease_data.execution.executionSuccess':
                                    finishedJob.getExecVal(),
                                    'grease_data.execution.completeTime':
                                    datetime.utcnow(),
                                    'grease_data.execution.returnData':
                                    finishedJob.getData()
                                }
                            })
                        else:
                            # Job Failure
                            self.ioc.getLogger().warning(
                                "Job Failed [{0}]".format(key),
                                additional=finishedJob.getData())
                            JobCollection.update_one({'_id': ObjectId(key)}, {
                                '$set': {
                                    'grease_data.execution.failures':
                                    val['command'].get('failures', 0) + 1
                                }
                            })
                        # close out job
                        finishedJob.__del__()
                        del finishedJob
                self.contextManager['jobs'] = jobs
            else:
                Threads = False
        return True

    def register(self):
        """Attempt to register with MongoDB

        Returns:
            bool: Registration Success

        """
        return self.ioc.ensureRegistration()

    def log_once_per_second(self, message, level=DEBUG, additional=None):
        """Log Message once per second

        Args:
            message (str): Message to log
            level (int): Log Level
            additional (object): Additional information that is able to be str'd

        Returns:
            None: Void Method to fire log message

        """
        if self._has_time_progressed():
            self.ioc.getLogger().TriageMessage(message=message,
                                               level=level,
                                               additional=additional)

    def _has_time_progressed(self):
        """Determines if the current second and the real second are not the same

        Returns:
            bool: if true then time has passed in a meaningful way

        """
        if self.current_real_second != datetime.utcnow().second:
            self.current_real_second = datetime.utcnow().second
            return True
        else:
            return False
Esempio n. 5
0
 def test_logger(self):
     ioc = GreaseContainer()
     self.assertTrue(isinstance(ioc.getLogger(), Logging))
Esempio n. 6
0
class PrototypeConfig(object):
    """Responsible for Scanning/Detection/Scheduling configuration

    Structure of Configuration::

        {
            'configuration': {
                'pkg': [], # <-- Loaded from pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/')
                'fs': [], # <-- Loaded from `<GREASE_DIR>/etc/*.config.json`
                'mongo': [] # <-- Loaded from the Configuration Mongo Collection
            },
            'raw': [], # <-- All loaded configurations
            'sources': [], # <-- list of sources found in configurations
            'source': {} # <-- keys will be source values list of configs for that source
            'names': [], # <-- all configs via their name so to allow dialing
            'name': {} # <-- all configs via their name so to allow being dialing
        }

    Structure of a configuration file::

        {
            "name": String,
            "job": String,
            "exe_env": String, # <-- If not provided will be default as 'general'
            "source": String,
            "logic": {
                # I need to be the logical blocks for Detection
            }
        }

    Attributes:
        ioc (GreaseContainer): IOC access

    """
    def __init__(self, ioc=None):
        global GREASE_PROTOTYPE_CONFIGURATION
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        if not GREASE_PROTOTYPE_CONFIGURATION:
            GREASE_PROTOTYPE_CONFIGURATION = self.load()

    def getConfiguration(self):
        """Returns the Configuration Object loaded into memory

        Returns:
            dict: Configuration object

        """
        global GREASE_PROTOTYPE_CONFIGURATION
        if not GREASE_PROTOTYPE_CONFIGURATION:
            self.load(reloadConf=True)
        return GREASE_PROTOTYPE_CONFIGURATION

    def load(self, reloadConf=False, ConfigurationList=None):
        """[Re]loads configuration data about the current execution node

        Configuration data loads from 3 places in GREASE. The first is internal to the package, if one were to manually
        add their own files into the package in the current directory following the file pattern. The next is following
        the same pattern but loaded from `<GREASE_DIR>/etc/`. The final place GREASE looks for configuration data is
        from the `configuration` collection in MongoDB

        Args:
            reloadConf (bool): If True this will reload the global object. False will return the object
            ConfigurationList (list of dict): If provided will load the list of dict for config after validation

        Note:
            Providing a configuration *automatically* reloads the memory structure of prototype configuration

        Returns:
            dict: Current Configuration information

        """
        global GREASE_PROTOTYPE_CONFIGURATION
        if ConfigurationList:
            conf = dict()
            conf['configuration'] = dict()
            conf['configuration'][
                'ConfigurationList'] = self.validate_config_list(
                    ConfigurationList)
            conf['raw'] = conf['configuration']['ConfigurationList']
            # split by configuration sets
            # the list of configured sources
            conf['sources'] = list()
            # the actual configurations for each source
            conf['source'] = dict()
            # configurations to get via name
            conf['names'] = list()
            # the actual configurations for each config name
            conf['name'] = dict()
            for config in conf.get('raw'):  # type: dict
                if config.get('source') in conf['sources']:
                    conf['source'][config.get('source')].append(config)
                else:
                    conf['sources'].append(config.get('source'))
                    conf['source'][config.get('source')] = list()
                    conf['source'][config.get('source')].append(config)
                if config.get('name') in conf['names']:
                    self.ioc.getLogger().error(
                        "Prototype Configuration [{0}] already found! Overwriting"
                        .format(config.get('name')))
                    conf['name'][config.get('name')] = config
                else:
                    conf['names'].append(config.get('name'))
                    conf['name'][config.get('name')] = config
            GREASE_PROTOTYPE_CONFIGURATION = conf
            return conf
        # fill out raw results
        conf = dict()
        conf['configuration'] = dict()
        conf['raw'] = []
        pkg = self.validate_config_list(
            self.load_from_fs(
                pkg_resources.resource_filename('tgt_grease.enterprise.Model',
                                                'config/')))
        for newConfig in pkg:
            conf['raw'].append(newConfig)
        conf['configuration']['pkg'] = pkg
        del pkg
        fs = self.validate_config_list(
            self.load_from_fs(self.ioc.getConfig().get('Configuration',
                                                       'dir')))
        for newConfig in fs:
            conf['raw'].append(newConfig)
        conf['configuration']['fs'] = fs
        del fs
        mongo = self.validate_config_list(self.load_from_mongo())
        for newConfig in mongo:
            conf['raw'].append(newConfig)
        conf['configuration']['mongo'] = mongo
        del mongo
        # split by configuration sets
        # the list of configured sources
        conf['sources'] = list()
        # the actual configurations for each source
        conf['source'] = dict()
        # configurations to get via name
        conf['names'] = list()
        # the actual configurations for each config name
        conf['name'] = dict()
        for config in conf.get('raw'):  # type: dict
            if config.get('source') in conf['sources']:
                conf['source'][config.get('source')].append(config)
            else:
                conf['sources'].append(config.get('source'))
                conf['source'][config.get('source')] = list()
                conf['source'][config.get('source')].append(config)
            if config.get('name') in conf['names']:
                self.ioc.getLogger().error(
                    "Prototype Configuration [{0}] already found! Overwriting".
                    format(config.get('name')))
                conf['name'][config.get('name')] = config
            else:
                conf['names'].append(config.get('name'))
                conf['name'][config.get('name')] = config
        # return block
        if not reloadConf:
            return conf
        else:
            GREASE_PROTOTYPE_CONFIGURATION = conf
            return conf

    def get_sources(self):
        """Returns the list of sources to be scanned

        Returns:
            list: List of sources

        """
        global GREASE_PROTOTYPE_CONFIGURATION  # type: dict
        if GREASE_PROTOTYPE_CONFIGURATION:
            return GREASE_PROTOTYPE_CONFIGURATION.get('sources', [])
        else:
            self.ioc.getLogger().error(
                "GREASE Prototype configuration is not loaded",
                trace=True,
                notify=False)
            return []

    def get_source(self, name):
        """Get all configuration by source by name

        Args:
            name (str): Source name to get

        Returns:
            list[dict]: Configuration if found else empty dict

        """
        global GREASE_PROTOTYPE_CONFIGURATION
        if GREASE_PROTOTYPE_CONFIGURATION:
            return GREASE_PROTOTYPE_CONFIGURATION.get('source').get(name, [])
        else:
            self.ioc.getLogger().error(
                "GREASE Prototype configuration not loaded",
                notify=False,
                trace=True)
            return []

    def get_names(self):
        """Returns the list of names of configs

        Returns:
            list: List of config names

        """
        global GREASE_PROTOTYPE_CONFIGURATION  # type: dict
        if GREASE_PROTOTYPE_CONFIGURATION:
            return GREASE_PROTOTYPE_CONFIGURATION.get('names', [])
        else:
            self.ioc.getLogger().error(
                "GREASE Prototype configuration is not loaded",
                trace=True,
                notify=False)
            return []

    def get_config(self, name):
        """Get Configuration by name

        Args:
            name (str): Configuration name to get

        Returns:
            dict: Configuration if found else empty dict

        """
        global GREASE_PROTOTYPE_CONFIGURATION
        if GREASE_PROTOTYPE_CONFIGURATION:
            if GREASE_PROTOTYPE_CONFIGURATION.get('name'):
                return GREASE_PROTOTYPE_CONFIGURATION.get('name').get(name, {})
            else:
                self.ioc.getLogger().error("GREASE Configuration Not Found",
                                           notify=False,
                                           trace=True)
                return {}
        else:
            self.ioc.getLogger().error(
                "GREASE Prototype configuration not loaded",
                notify=False,
                trace=True)
            return {}

    def load_from_fs(self, directory):
        """Loads configurations from provided directory

        Note:
            Pattern is `*.config.json`

        Args:
            directory (str): Directory to load from

        Returns:
            list of dict: configurations

        """
        self.ioc.getLogger().trace(
            "Loading Configurations from directory [{0}]".format(directory),
            trace=True)
        intermediate = list()
        matches = []
        for root, dirnames, filenames in os.walk(directory):
            for filename in fnmatch.filter(filenames, '*.config.json'):
                matches.append(os.path.join(root, filename))
        for doc in matches:
            self.ioc.getLogger().trace("Attempting to load [{0}]".format(doc),
                                       trace=True)
            with open(doc, 'rb') as current_file:
                content = current_file.read()
                if isinstance(content, bytes):
                    content = content.decode()
            try:
                intermediate.append(json.loads(content))
                self.ioc.getLogger().trace(
                    "Successfully loaded [{0}]".format(doc), trace=True)
            except ValueError:
                self.ioc.getLogger().error("Failed to load [{0}]".format(doc),
                                           trace=True,
                                           notify=False)
                continue
        self.ioc.getLogger().trace(
            "total documents returned from fs [{0}]".format(len(intermediate)),
            trace=True)
        return intermediate

    def load_from_mongo(self):
        """Returns all active configurations from the mongo collection Configuration

        Structure of Configuration expected in Mongo::

            {
                "name": String,
                "job": String,
                "exe_env": String, # <-- If not provided will be default as 'general'
                "active": Boolean, # <-- set to true to load configuration
                "type": "prototype_config", # <-- MUST BE THIS VALUE; For it is the config type :)
                "source": String,
                "logic": {
                    # I need to be the logical blocks for Detection
                }
            }

        Returns:
            list of dict: Configurations

        """
        self.ioc.getLogger().trace("Loading Configurations from mongo",
                                   trace=True)
        mConf = []
        for conf in self.ioc.getCollection('Configuration').find({
                'active':
                True,
                'type':
                'prototype_config'
        }):
            mConf.append(dict(conf))
        self.ioc.getLogger().trace(
            "total documents returned from mongo [{0}]".format(len(mConf)),
            trace=True)
        return mConf

    def validate_config_list(self, configs):
        """Validates a configuration List

        Args:
            configs (list[dict]): Configuration List

        Returns:
            list: The Valid configurations

        """
        final = []
        self.ioc.getLogger().trace(
            "Total configurations to validate [{0}]".format(len(configs)))
        for conf in configs:
            if self.validate_config(conf):
                final.append(conf)
        return final

    def validate_config(self, config):
        """Validates a configuration

        The default JSON Schema is this::

            {
                "name": String,
                "job": String,
                "exe_env": String, # <-- If not provided will be default as 'general'
                "source": String,
                "logic": {
                    # I need to be the logical blocks for Detection
                }
            }

        Args:
            config (dict): Configuration to validate

        Returns:
            bool: If it is a valid configuration

        """
        self.ioc.getLogger().trace(
            "Configuration to be validated: [{0}]".format(config), trace=True)
        if not isinstance(config, dict):
            self.ioc.getLogger().error(
                "Configuration Validation Failed! Not of Type Dict::Got [{0}]".
                format(str(type(config))),
                trace=True,
                notify=False)
        if config.get('name'):
            if not isinstance(config.get('name'), str):
                config['name'] = str(config.get('name'))
        else:
            self.ioc.getLogger().error(
                "Configuration does not have valid name field",
                trace=True,
                notify=False)
            return False
        if config.get('job'):
            if not isinstance(config.get('job'), str):
                config['job'] = str(config.get('job'))
        else:
            self.ioc.getLogger().error(
                "Configuration does not have valid job field",
                trace=True,
                notify=False)
            return False
        if config.get('source'):
            if not isinstance(config.get('source'), str):
                config['source'] = str(config.get('source'))
        else:
            self.ioc.getLogger().error(
                "Configuration does not have valid source field",
                trace=True,
                notify=False)
            return False
        if not isinstance(config.get('logic'), dict):
            self.ioc.getLogger().error(
                "Configuration does not have valid logic field",
                trace=True,
                notify=False)
            return False
        if not config.get('logic'):
            # empty dictionary check AKA no logical blocks
            return False
        for key, params in config.get('logic').items():
            if not isinstance(params, list):
                self.ioc.getLogger().error(
                    "Configuration logic field was not list!",
                    trace=True,
                    notify=False)
                return False
            for block in params:
                if not isinstance(block, dict):
                    self.ioc.getLogger().error(
                        "Configuration logical block was not dict",
                        trace=True,
                        notify=False)
                    return False
        return True
Esempio n. 7
0
class Scan(object):
    """Scanning class for GREASE Scanner

    This is the model to actually utilize the scanners to parse the configured environments

    Attributes:
        ioc (GreaseContainer): IOC for scanning
        conf (PrototypeConfig): Prototype configuration instance
        impTool (ImportTool): Import Utility Instance
        dedup (Deduplication): Deduplication instance to be used

    """
    def __init__(self, ioc=None):
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.conf = PrototypeConfig(self.ioc)
        self.impTool = ImportTool(self.ioc.getLogger())
        self.dedup = Deduplication(self.ioc)
        self.scheduler = Scheduling(self.ioc)

    def Parse(self, source=None, config=None):
        """This will read all configurations and attempt to scan the environment

        This is the primary business logic for scanning in GREASE. This method will use configurations to parse
        the environment and attempt to schedule

        Note:
            If a Source is specified then *only* that source is parsed. If a configuration is set then *only* that
            configuration is parsed. If both are provided then the configuration will *only* be parsed if it is of
            the source provided

        Note:
            **If mocking is enabled**: Deduplication *will not occur*

        Args:
            source (str): If set will only parse for the source listed
            config (str): If set will only parse the specified config

        Returns:
            bool: True unless error

        """
        self.ioc.getLogger().trace("Starting Parse of Environment", trace=True)
        Configuration = self.generate_config_set(source=source, config=config)
        for conf in Configuration:
            inst = self.impTool.load(conf.get('source', str(uuid4())))
            if not isinstance(inst, BaseSourceClass):
                self.ioc.getLogger().error("Invalid Source [{0}]".format(
                    conf.get('source')),
                                           notify=False)
                del inst
                continue
            else:
                # If mock mode enabled
                if self.ioc.getConfig().get('Sourcing', 'mock'):
                    data = inst.mock_data(conf)
                # else actually do sourcing
                else:
                    if inst.parse_source(conf):
                        # deduplicate data
                        data = self.dedup.Deduplicate(
                            data=inst.get_data(),
                            source=conf.get('source'),
                            threshold=inst.deduplication_strength,
                            expiry_hours=inst.deduplication_expiry,
                            expiry_max=inst.deduplication_expiry_max,
                            collection='Dedup_Sourcing',
                            field_set=inst.field_set)
                    else:
                        self.ioc.getLogger().warning(
                            "Source [{0}] parsing failed".format(
                                conf.get('source')),
                            notify=False)
                        data = []
                if len(data) > 0:
                    if self.scheduler.scheduleDetection(
                            conf.get('source'), conf.get('name'), data):
                        self.ioc.getLogger().info(
                            "Data scheduled for detection from source [{0}]".
                            format(conf.get('source')),
                            trace=True)
                        del inst
                        continue
                    else:
                        self.ioc.getLogger().error(
                            "Scheduling failed for source document!",
                            notify=False)
                        del inst
                        continue
                else:
                    self.ioc.getLogger().trace(
                        "Length of data was empty; was not scheduled",
                        trace=True)
                    del inst
                    continue
        return True

    def generate_config_set(self, source=None, config=None):
        """Examines configuration and returns list of configs to parse

        Note:
            If a Source is specified then *only* that source is parsed. If a configuration is set then *only* that
            configuration is parsed. If both are provided then the configuration will *only* be parsed if it is of
            the source provided

        Args:
            source (str): If set will only parse for the source listed
            config (str): If set will only parse the specified config

        Returns:
            list[dict]: Returns Configurations to Parse for data

        """
        ConfigList = []
        if source and config:
            if self.conf.get_config(config).get('source') == source:
                ConfigList.append(self.conf.get_config(config))
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Configuration [{0}] Not Found With Correct Source [{1}]".
                    format(config, source),
                    trace=True,
                    notify=False)
        elif source and not config:
            if source in self.conf.get_sources():
                for configuration in self.conf.get_source(source):
                    ConfigList.append(configuration)
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Source not found in Configuration [{0}]".format(source),
                    trace=True,
                    notify=False)
        elif not source and config:
            if self.conf.get_config(config):
                ConfigList.append(self.conf.get_config(config))
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Config not found in Configuration [{0}]".format(config),
                    trace=True,
                    notify=False)
        else:
            ConfigList = self.conf.getConfiguration().get('raw')
        return ConfigList
Esempio n. 8
0
class BridgeCommand(object):
    """Methods for Cluster Administration

    Attributes:
        imp (ImportTool): Import Tool Instance
        monitor (NodeMonitoring): Node Monitoring Model Instance

    """
    def __init__(self, ioc=None):
        if isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.imp = ImportTool(self.ioc.getLogger())
        self.monitor = NodeMonitoring(self.ioc)

    def action_register(self):
        """Ensures Registration of server

        Returns:
            bool: Registration status

        """
        self.ioc.getLogger().debug("Registration Requested")
        if self.ioc.ensureRegistration():
            print("Registration Complete!")
            self.ioc.getLogger().info("Registration Completed Successfully")
            return True
        print("Registration Failed!")
        self.ioc.getLogger().info("Registration Failed")
        return False

    def action_info(self, node=None, jobs=None, prototypeJobs=None):
        """Gets Node Information

        Args:
            node (str): MongoDB Object ID to get information about
            jobs (bool): If true then will retrieve jobs executed by this node
            prototypeJobs (bool): If true then prototype jobs will be printed as well

        Note:
            provide a node argument via the CLI --node=4390qwr2fvdew458239
        Note:
            provide a jobs argument via teh CLI --jobs
        Note:
            provide a prototype jobs argument via teh CLI --pJobs

        Returns:
            bool: If Info was found

        """
        if not self.ioc.ensureRegistration():
            self.ioc.getLogger().error("Server not registered with MongoDB")
            print("Unregistered servers cannot talk to the cluster")
            return False
        valid, serverId = self.valid_server(node)
        if not valid:
            print("Invalid ObjectID")
            return False
        server = self.ioc.getCollection('JobServer').find_one(
            {'_id': ObjectId(str(serverId))})
        if server:
            server = dict(server)
            print("""
<<<<<<<<<<<<<< SERVER: {0} >>>>>>>>>>>>>>
Activation State: {1} Date: {2}
Jobs: {3}
Operating System: {4}
Prototypes: {5}
Execution Roles: {6}
            """.format(server.get('_id'), server.get('active'),
                       server.get('activationTime'), server.get('jobs'),
                       server.get('os'), server.get('prototypes'),
                       server.get('roles')))
            if jobs and prototypeJobs:
                print(
                    "======================= SOURCING =======================")
                for job in self.ioc.getCollection('SourceData').find(
                    {'grease_data.sourcing.server': ObjectId(serverId)}):
                    print(
                        """
-------------------------------
Job: {0}
-------------------------------
                    """, job['_id'])
            if jobs and prototypeJobs:
                print(
                    "======================= DETECTION ======================="
                )
                for job in self.ioc.getCollection('SourceData').find(
                    {'grease_data.detection.server': ObjectId(serverId)}):
                    print("""
-------------------------------
Job: {0}
Start Time: {1}
End Time: {2}
Context: {3}
-------------------------------
                    """.format(job['_id'],
                               job['grease_data']['detection']['start'],
                               job['grease_data']['detection']['end'],
                               job['grease_data']['detection']['detection']))
            if jobs and prototypeJobs:
                print(
                    "======================= SCHEDULING ======================="
                )
                for job in self.ioc.getCollection('SourceData').find(
                    {'grease_data.scheduling.server': ObjectId(serverId)}):
                    print("""
-------------------------------
Job: {0}
Start Time: {1}
End Time: {2}
-------------------------------
                    """.format(job['_id'],
                               job['grease_data']['scheduling']['start'],
                               job['grease_data']['scheduling']['end']))
            if jobs:
                print(
                    "======================= EXECUTION ======================="
                )
                for job in self.ioc.getCollection('SourceData').find(
                    {'grease_data.execution.server': ObjectId(serverId)}):
                    print("""
-------------------------------
Job: {0}
Assignment Time: {1}
Completed Time: {2}
Execution Success: {3}
Command Success: {4}
Failures: {5}
Return Data: {6}
-------------------------------
                    """.format(
                        job['_id'],
                        job['grease_data']['execution']['assignmentTime'],
                        job['grease_data']['execution']['completeTime'],
                        job['grease_data']['execution']['executionSuccess'],
                        job['grease_data']['execution']['commandSuccess'],
                        job['grease_data']['execution']['failures'],
                        job['grease_data']['execution']['returnData']))
            return True
        print("Unable to locate server")
        self.ioc.getLogger().error(
            "Unable to load [{0}] server for information".format(serverId))
        return False

    def action_assign(self, prototype=None, role=None, node=None):
        """Assign prototypes/roles to a node either local or remote

        Args:
            prototype (str): Prototype Job to assign
            role (str): Role to assign
            node (str): MongoDB ObjectId of node to assign to, if not provided will default to the local node

        Returns:
            bool: If successful true else false

        """
        assigned = False
        if prototype:
            job = self.imp.load(str(prototype))
            if not job or not isinstance(job, Command):
                print(
                    "Cannot find prototype [{0}] to assign check search path!".
                    format(prototype))
                self.ioc.getLogger().error(
                    "Cannot find prototype [{0}] to assign check search path!".
                    format(prototype))
                return False
            # Cleanup job
            job.__del__()
            del job
            valid, serverId = self.valid_server(node)
            if not valid:
                print("Invalid ObjectID")
                return False
            updated = self.ioc.getCollection('JobServer').update_one(
                {
                    '_id': ObjectId(serverId)
                }, {
                    '$addToSet': {
                        'prototypes': prototype
                    }
                }).acknowledged
            if updated:
                print("Prototype Assigned")
                self.ioc.getLogger().info(
                    "Prototype [{0}] assigned to server [{1}]".format(
                        prototype, serverId))
                assigned = True
            else:
                print("Prototype Assignment Failed!")
                self.ioc.getLogger().info(
                    "Prototype [{0}] assignment failed to server [{1}]".format(
                        prototype, serverId))
                return False
        if role:
            valid, serverId = self.valid_server(node)
            if not valid:
                print("Invalid ObjectID")
                return False
            updated = self.ioc.getCollection('JobServer').update_one(
                {
                    '_id': ObjectId(serverId)
                }, {
                    '$push': {
                        'roles': role
                    }
                }).acknowledged
            if updated:
                print("Role Assigned")
                self.ioc.getLogger().info(
                    "Role [{0}] assigned to server [{1}]".format(
                        prototype, serverId))
                assigned = True
            else:
                print("Role Assignment Failed!")
                self.ioc.getLogger().info(
                    "Role [{0}] assignment failed to server [{1}]".format(
                        prototype, serverId))
                return False
        if not assigned:
            print("Assignment failed, please check logs for details")
        return assigned

    def action_unassign(self, prototype=None, role=None, node=None):
        """Unassign prototypes to a node either local or remote

        Args:
            prototype (str): Prototype Job to unassign
            role (str): Role to unassign
            node (str): MongoDB ObjectId of node to unassign to, if not provided will default to the local node

        Returns:
            bool: If successful true else false

        """
        unassigned = False
        if prototype:
            job = self.imp.load(str(prototype))
            if not job or not isinstance(job, Command):
                print(
                    "Cannot find prototype [{0}] to unassign check search path!"
                    .format(prototype))
                self.ioc.getLogger().error(
                    "Cannot find prototype [{0}] to unassign check search path!"
                    .format(prototype))
                return False
            # Cleanup job
            job.__del__()
            del job
            valid, serverId = self.valid_server(node)
            if not valid:
                print("Invalid ObjectID")
                return False
            updated = self.ioc.getCollection('JobServer').update_one(
                {
                    '_id': ObjectId(serverId)
                }, {
                    '$pull': {
                        'prototypes': prototype
                    }
                }).acknowledged
            if updated:
                print("Prototype Assignment Removed")
                self.ioc.getLogger().info(
                    "Prototype [{0}] unassigned from server [{1}]".format(
                        prototype, serverId))
                unassigned = True
            else:
                print("Prototype Unassignment Failed!")
                self.ioc.getLogger().info(
                    "Prototype [{0}] unassignment failed from server [{1}]".
                    format(prototype, serverId))
                return False
        if role:
            valid, serverId = self.valid_server(node)
            if not valid:
                print("Invalid ObjectID")
                return False
            updated = self.ioc.getCollection('JobServer').update_one(
                {
                    '_id': ObjectId(serverId)
                }, {
                    '$pull': {
                        'roles': role
                    }
                }).acknowledged
            if updated:
                print("Role Removed")
                self.ioc.getLogger().info(
                    "Role [{0}] removed to server [{1}]".format(
                        prototype, serverId))
                unassigned = True
            else:
                print("Role Removal Failed!")
                self.ioc.getLogger().info(
                    "Role [{0}] removal failed to server [{1}]".format(
                        prototype, serverId))
                return False
        if not unassigned:
            print("Unassignment failed, please check logs for details")
        return unassigned

    def action_cull(self, node=None):
        """Culls a server from the active cluster

        Args:
            node (str): MongoDB ObjectId to cull; defaults to local node

        """
        if not self.ioc.ensureRegistration():
            self.ioc.getLogger().error("Server not registered with MongoDB")
            print("Unregistered servers cannot talk to the cluster")
            return False
        valid, serverId = self.valid_server(node)
        if not valid:
            print("Invalid ObjectID")
            return False
        if not self.monitor.deactivateServer(serverId):
            self.ioc.getLogger().error(
                "Failed deactivating server [{0}]".format(serverId))
            print("Failed deactivating server [{0}]".format(serverId))
            return False
        self.ioc.getLogger().warning(
            "Server [{0}] preparing to reallocate detect jobs".format(
                serverId))
        if not self.monitor.rescheduleDetectJobs(serverId):
            self.ioc.getLogger().error(
                "Failed rescheduling detect jobs [{0}]".format(serverId))
            print("Failed rescheduling detect jobs [{0}]".format(serverId))
            return False
        self.ioc.getLogger().warning(
            "Server [{0}] preparing to reallocate schedule jobs".format(
                serverId))
        if not self.monitor.rescheduleScheduleJobs(serverId):
            self.ioc.getLogger().error(
                "Failed rescheduling detect jobs [{0}]".format(serverId))
            print("Failed rescheduling detect jobs [{0}]".format(serverId))
            return False
        self.ioc.getLogger().warning(
            "Server [{0}] preparing to reallocate jobs".format(serverId))
        if not self.monitor.rescheduleJobs(serverId):
            self.ioc.getLogger().error(
                "Failed rescheduling detect jobs [{0}]".format(serverId))
            print("Failed rescheduling detect jobs [{0}]".format(serverId))
            return False
        print("Server Deactivated")
        return True

    def action_activate(self, node=None):
        """activates server in cluster

        Args:
            node (str): MongoDB ObjectId to activate; defaults to local node

        Returns:
            bool: If activation is successful

        """
        if not self.ioc.ensureRegistration():
            self.ioc.getLogger().error("Server not registered with MongoDB")
            print("Unregistered servers cannot talk to the cluster")
            return False
        valid, serverId = self.valid_server(node)
        if not valid:
            print("Invalid ObjectID")
            return False
        if self.ioc.getCollection('JobServer').update_one(
            {
                '_id': ObjectId(serverId)
            }, {
                '$set': {
                    'active': True,
                    'activationTime': datetime.datetime.utcnow()
                }
            }).modified_count < 1:
            self.ioc.getLogger().warning(
                "Server [{0}] failed to be activated".format(serverId))
            return False
        self.ioc.getLogger().warning("Server [{0}] activated".format(serverId))
        return True

    def valid_server(self, node=None):
        """Validates node is in the MongoDB instance connected to

        Args:
            node (str): MongoDB Object ID to validate; defaults to local node

        Returns:
            tuple: first element is boolean if valid second is objectId as string

        """
        if node:
            try:
                server = self.ioc.getCollection('JobServer').find_one(
                    {'_id': ObjectId(str(node))})
            except InvalidId:
                self.ioc.getLogger().error(
                    "Invalid ObjectID passed to bridge info [{0}]".format(
                        node))
                return False, ""
            if server:
                return True, dict(server).get('_id')
            self.ioc.getLogger().error(
                "Failed to find server [{0}] in the database".format(node))
            return False, ""
        return True, self.ioc.getConfig().NodeIdentity
Esempio n. 9
0
class NodeMonitoring(object):
    """Monitors cluster nodes for unhealthy state

    Attributes:
        ioc (GreaseContainer): IoC Access
        centralScheduler (Scheduling): Central Scheduling Instance
        scheduler (Scheduler): Scheduling Model Instance

    """

    def __init__(self, ioc=None):
        if isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.centralScheduler = Scheduling(self.ioc)
        self.scheduler = Scheduler(self.ioc)

    def monitor(self):
        """Monitoring process

        Returns:
            bool: If successful monitoring run occurred

        """
        servers = self.getServers()
        retVal = False
        self.ioc.getLogger().debug("Total servers to monitor [{0}]".format(len(servers)), trace=True)
        for server in servers:
            if self.serverAlive(server.get('_id')):
                retVal = True
                continue
            else:
                self.ioc.getLogger().warning("Server [{0}] preparing to be culled from pool".format(server.get('_id')))
                self.ioc.getLogger().warning("Server [{0}] preparing to be deactivated".format(server.get('_id')))
                if not self.deactivateServer(server.get('_id')):
                    self.ioc.getLogger().error(
                        "Failed deactivating server [{0}]".format(server.get('_id'))
                    )
                    retVal = False
                    break
                self.ioc.getLogger().warning(
                    "Server [{0}] preparing to reallocate detect jobs".format(server.get('_id'))
                )
                if not self.rescheduleDetectJobs(server.get('_id')):
                    self.ioc.getLogger().error(
                        "Failed rescheduling detect jobs [{0}]".format(server.get('_id'))
                    )
                    retVal = False
                    break
                self.ioc.getLogger().warning(
                    "Server [{0}] preparing to reallocate schedule jobs".format(server.get('_id'))
                )
                if not self.rescheduleScheduleJobs(server.get('_id')):
                    self.ioc.getLogger().error(
                        "Failed rescheduling detect jobs [{0}]".format(server.get('_id'))
                    )
                    retVal = False
                    break
                self.ioc.getLogger().warning(
                    "Server [{0}] preparing to reallocate jobs".format(server.get('_id'))
                )
                if not self.rescheduleJobs(server.get('_id')):
                    self.ioc.getLogger().error(
                        "Failed rescheduling detect jobs [{0}]".format(server.get('_id'))
                    )
                    retVal = False
                    break
        return retVal

    def scanComplete(self):
        """Enters a completed source so that this local server is alive next run

        This method is so that the server's 'heart' beats after each run. It will insert a completed SourceData document
        and increments the job counter in the JobServer Document

        Returns:
            None: Writes a MongoDB Document

        """
        self.ioc.getCollection('SourceData').insert_one({
            'grease_data': {
                'sourcing': {
                    'server': ObjectId(self.ioc.getConfig().NodeIdentity)
                },
                'detection': {
                    'server': ObjectId(self.ioc.getConfig().NodeIdentity),
                    'start': datetime.datetime.utcnow(),
                    'end': datetime.datetime.utcnow(),
                    'detection': {}
                },
                'scheduling': {
                    'server': ObjectId(self.ioc.getConfig().NodeIdentity),
                    'start': datetime.datetime.utcnow(),
                    'end': datetime.datetime.utcnow()
                },
                'execution': {
                    'server': ObjectId(self.ioc.getConfig().NodeIdentity),
                    'assignmentTime': datetime.datetime.utcnow(),
                    'completeTime': datetime.datetime.utcnow(),
                    'returnData': {},
                    'executionSuccess': True,
                    'commandSuccess': True,
                    'failures': 0
                }
            },
            'source': 'grease_internal_node_monitoring',
            'configuration': None,
            'data': {},
            'createTime': datetime.datetime.utcnow(),
            'expiry': Deduplication.generate_max_expiry_time(1)
        })
        server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(self.ioc.getConfig().NodeIdentity)})
        if not server:
            self.ioc.getLogger().critical(
                "Failed to find server [{0}] after monitoring occurred!".format(self.ioc.getConfig().NodeIdentity)
            )
        self.ioc.getCollection('JobServer').update_one({
            '_id': ObjectId(self.ioc.getConfig().NodeIdentity)},
            {'$set': {'jobs': dict(server).get('jobs', 0) + 1}}
        )

    def getServers(self):
        """Returns the servers to be monitored this cycle

        Returns:
            list[dict]: List of servers

        """
        final = []
        servers = self.ioc.getCollection('JobServer').find({'active': True})
        for server in servers:
            final.append(dict(server))
        return final

    def serverAlive(self, serverId):
        """Checks to see if server is alive

        This method checks if the serverID exists in the collection and determines if it's execution number has
        changed recently. If it is a newly configured node it will be added to the monitoring collection

        Args:
            serverId (str): ObjectId of server

        Returns:
            bool: If server is alive

        """
        # Server Health Collection
        coll = self.ioc.getCollection('ServerHealth')
        Server = coll.find_one({'server': ObjectId(serverId)})
        if Server:
            # We have a server already in the system
            serverStats = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)})
            if serverStats:
                # compare previous results to see if there has been change
                if dict(Server).get('jobs', 0) < dict(serverStats).get('jobs', 0):
                    # Job Server Numbers have changed
                    coll.update_one(
                        {'_id': Server['_id']},
                        {
                            '$set': {
                                'jobs': dict(serverStats).get('jobs', 0),
                                'checkTime': datetime.datetime.utcnow()
                            }
                        }
                    )
                    self.ioc.getLogger().trace("JobServer [{0}] is alive".format(serverId), trace=True)
                    return True
                else:
                    if dict(Server).get('checkTime', datetime.datetime.utcnow()) < \
                            datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                        # server has aged out
                        self.ioc.getLogger().trace(
                            "JobServer [{0}] is not alive; Timestamp has not changed in ten minutes".format(serverId),
                            trace=True
                        )
                        return False
                    else:
                        # server is in a degraded state
                        self.ioc.getLogger().warning("JobServer [{0}] is degraded!".format(serverId), trace=True)
                        return True
            else:
                # Failed to find server in JobServer collection
                self.ioc.getLogger().error("JobServer not found during node monitoring! [{0}]".format(serverId))
                return False
        else:
            # we have a new server
            serverStats = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)})
            if serverStats:
                coll.insert_one(
                    {
                        'server': ObjectId(serverId),
                        'jobs': dict(serverStats).get('jobs', 0),
                        'checkTime': datetime.datetime.utcnow()
                    }
                )
                self.ioc.getLogger().info("New JobServer persisted in monitoring [{0}]".format(serverId))
                return True
            else:
                # Failed to find server in JobServer collection
                self.ioc.getLogger().error("New JobServer not found during node monitoring! [{0}]".format(serverId))
                return False

    def deactivateServer(self, serverId):
        """deactivates server from pool

        Args:
            serverId (str): ObjectId to deactivate

        Returns:
            bool: If deactivation is successful

        """
        if self.ioc.getCollection('JobServer').update_one(
                {'_id': ObjectId(serverId)},
                {
                    '$set': {
                        'active': False
                    }
                }
        ).modified_count < 1:
            self.ioc.getLogger().warning("Server [{0}] failed to be deactivated".format(serverId))
            return False
        else:
            self.ioc.getLogger().warning("Server [{0}] deactivated".format(serverId))
            return True

    def rescheduleDetectJobs(self, serverId):
        """Reschedules any detection jobs

        Args:
            serverId (str): Server ObjectId

        Returns:
            bool: rescheduling success

        """
        retval = True
        server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)})
        if not server:
            self.ioc.getLogger().error(
                "Failed to load server details while trying to reschedule detection [{0}]".format(serverId)
            )
            return False
        for job in self.ioc.getCollection('SourceData').find(
            {
                'grease_data.detection.server': ObjectId(serverId),
                'grease_data.detection.start': None,
                'grease_data.detection.end': None
            }
        ):
            job = dict(job)
            if not self.centralScheduler.scheduleDetection(job.get('source'), job.get('configuration'), [job]):
                retval = False
                break
            else:
                self.ioc.getCollection('JobServer').update_one(
                    {'_id': ObjectId(serverId)},
                    {
                        '$set': {
                            'jobs': dict(server).get('jobs', 0) - 1
                        }
                    }
                )
        return retval

    def rescheduleScheduleJobs(self, serverId):
        """Reschedules any detection jobs

        Args:
            serverId (str): Server ObjectId

        Returns:
            bool: rescheduling success

        """
        retval = True
        server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)})
        if not server:
            self.ioc.getLogger().error(
                "Failed to load server details while trying to reschedule schedules [{0}]".format(serverId)
            )
            return False
        for job in self.ioc.getCollection('SourceData').find(
            {
                'grease_data.scheduling.server': ObjectId(serverId),
                'grease_data.scheduling.start': None,
                'grease_data.scheduling.end': None
            }
        ):
            job = dict(job)
            if not self.centralScheduler.scheduleScheduling(job.get('_id')):
                retval = False
                break
            else:
                self.ioc.getCollection('JobServer').update_one(
                    {'_id': ObjectId(serverId)},
                    {
                        '$set': {
                            'jobs': dict(server).get('jobs', 0) - 1
                        }
                    }
                )
        return retval

    def rescheduleJobs(self, serverId):
        """Reschedules any detection jobs

        Args:
            serverId (str): Server ObjectId

        Returns:
            bool: rescheduling success

        """
        retval = True
        server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)})
        if not server:
            self.ioc.getLogger().error(
                "Failed to load server details while trying to reschedule schedules [{0}]".format(serverId)
            )
            return False
        for job in self.ioc.getCollection('SourceData').find(
            {
                'grease_data.execution.server': ObjectId(serverId),
                'grease_data.execution.commandSuccess': False,
                'grease_data.execution.executionSuccess': False,
                'grease_data.execution.failures': {'$lt': 6}
            }
        ):
            job = dict(job)
            if not self.scheduler.schedule(job):
                retval = False
                break
            else:
                self.ioc.getCollection('JobServer').update_one(
                    {'_id': ObjectId(serverId)},
                    {
                        '$set': {
                            'jobs': dict(server).get('jobs', 0) - 1
                        }
                    }
                )
        return retval
Esempio n. 10
0
class Deduplication(object):
    """Responsible for Deduplication Operations

    Deduplication in GREASE is a multi-step process to ensure performance and accuracy of deduplication. The overview of
    this process is this:
        - Step 1: Identify a Object Type 1 Hash Match. A Type 1 Object (T1) is a SHA256 hash of a dictionary in a data list. If we can hash the entire object and find a match then the object is 100% duplicate.
        - Step 2: Object Type 2 Matching. If a Type 1 (T1) object cannot be found Type 2 Object (T2) deduplication occurs. This will introspect the dictionary for each field and map them against other likely objects of the same type. If a hash match is found (source + field + value as a SHA256) then the field is 100% duplicate. The aggregate score of all fields or the specified subset is above the provided threshold then the object is duplicate. This prevents similar objects from passing through when they are most likely updates to an original object that does not need to be computed on. If a field updates that you will need always then exclude it will need to be passed into the `Deduplicate` function.

    Object examples::

        # Type 1 Object

        {
            '_id': ObjectId, # <-- MongoDB ObjectID
            'type: Int, # <-- Always Type 1
            'hash': String, # <-- SHA256 hash of entire object
            'expiry': DateTime, # <-- Expiration time if no objects are found to be duplicate after which object will be deleted
            'max_expiry': DateTime, # <-- Expiration time for object to be deleted when reached
            'score': Int, # <-- Amount of times this object has been found
            'source': String # <-- Source of the object
        }
        # Type 2 Object
        {
            '_id': ObjectId, # <-- MongoDB ObjectID
            'type: Int, # <-- Always Type 2
            'source': String, # <-- Source of data
            'field': String, # <-- Field in Object
            'value': String, # <-- Value of Object's field
            'hash': String, # <-- SHA256 of source + field + value
            'expiry': DateTime, # <-- Expiration time if no objects are found to be duplicate after which object will be deleted
            'max_expiry': DateTime, # <-- Expiration time for object to be deleted when reached
            'score': Int, # <-- Amount of times this object has been found
            'parentId': ObjectId # <-- T1 Object ID from parent
        }

    Attributes:
        ioc (GreaseContainer): IoC access for DeDuplication

    """
    def __init__(self, ioc=None):
        if isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()

    def Deduplicate(self,
                    data,
                    source,
                    configuration,
                    threshold,
                    expiry_hours,
                    expiry_max,
                    collection,
                    field_set=None):
        """Deduplicate data

        This method will deduplicate the `data` object to allow for only unique objects to be returned. The collection
        variable will be the collection deduplication data will be stored in

        Args:
            data (list[dict]): **list or single dimensional dictionaries** to deduplicate
            source (str): Source of data being deduplicated
            configuration (str): Configuration Name Provided
            threshold (float): level of duplication allowed in an object (the lower the threshold the more uniqueness is required)
            expiry_hours (int): Hours to retain deduplication data
            expiry_max (int): Maximum days to retain deduplication data
            collection (str): Deduplication collection to use
            field_set (list, optional): Fields to deduplicate on

        Note:
            expiry_hours is specific to how many hours objects will be persisted for if they are not seen again

        Returns:
            list[dict]: Deduplicated data

        """
        # ensure we got a list
        if not isinstance(data, list):
            self.ioc.getLogger().error(
                "Data was not of type list for Deduplication got type [{0}]".
                format(str(type(data))),
                notify=False,
                verbose=True)
            return []
        # ensure there is data to parse
        if len(data) <= 0:
            # empty list return empty lists
            return []
        self.ioc.getLogger().trace(
            "Starting deduplication from data source [{0}] total records to parse [{1}]"
            .format(source, len(data)),
            trace=True)
        # now comes James' version of machine learning. I call it "Blue Collar Machine Learning"
        # Pointer to access items in the list
        data_pointer = 0
        # Max Length
        data_max = len(data)
        if data_max is 0:
            # we have no data to process
            self.ioc.getLogger().trace("Length of data is zero", verbose=True)
            return []
        # Thread pool
        threads = []
        # Final result
        final = []
        # loop through the objects
        while data_pointer < data_max:
            # ensure we don't swamp the system resources
            cpu = cpu_percent(interval=.1)
            mem = virtual_memory().percent
            if \
                    cpu >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')) or \
                    mem >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')):
                self.ioc.getLogger().trace(
                    "Deduplication sleeping; System resource maximum reached",
                    verbose=True)
                # remove variables
                del cpu
                del mem
                continue
            # Resources are available lets start cooking down this list
            # Poll the active threads to ensure we are cleaning up
            self.ioc.getLogger().trace("Thread Pool polling Starting",
                                       verbose=True)
            threads_final = []
            for thread in threads:
                if thread.isAlive():
                    threads_final.append(thread)
            threads = threads_final
            self.ioc.getLogger().trace("Thread polling complete", verbose=True)
            self.ioc.getLogger().trace(
                "Total current deduplication threads [{0}]".format(
                    len(threads)),
                verbose=True)
            # ensure we do not breach the thread limit for the server
            if len(threads) >= int(self.ioc.getConfig().get(
                    'NodeInformation', 'DeduplicationThreads')):
                self.ioc.getLogger().trace(
                    "Thread max reached. Deduplication waiting for threads to complete",
                    verbose=True)
                continue
            # Ensure each object is a dictionary
            if not isinstance(data[data_pointer], dict):
                self.ioc.getLogger().warning(
                    'DeDuplication Received NON-DICT from source: [{0}] Type: [{1}] got: [{2}]'
                    .format(source, str(type(data[data_pointer])),
                            str(data[data_pointer])))
                data_pointer += 1
                continue
            # create thread for deduplication
            proc = threading.Thread(
                target=self.deduplicate_object,
                args=(
                    self.ioc,
                    data[data_pointer],
                    expiry_hours,
                    expiry_max,
                    threshold,
                    source,
                    configuration,
                    final,
                    collection,
                    data_pointer,
                    data_max,
                    field_set,
                ),
                name="GREASE DEDUPLICATION THREAD [{0}/{1}]".format(
                    data_pointer, data_max))
            proc.daemon = True
            proc.start()
            threads.append(proc)
            data_pointer += 1
            self.ioc.getLogger().trace(
                "Total current deduplication threads [{0}]".format(
                    len(threads)),
                verbose=True)
        self.ioc.getLogger().info(
            "All data objects have been threaded for processing", verbose=True)
        # wait for threads to finish out
        while len(threads) > 0:
            self.ioc.getLogger().trace(
                "Total current deduplication threads [{0}]".format(
                    len(threads)),
                verbose=True)
            threads_final = []
            for thread in threads:
                if thread.isAlive():
                    threads_final.append(thread)
            threads = threads_final
            self.ioc.getLogger().trace(
                "Total current deduplication threads [{0}]".format(
                    len(threads)),
                verbose=True)
        # ensure collections expiry timers are in place
        self.ioc.getCollection(collection).create_index([('expiry', 1),
                                                         ('expireAfterSeconds',
                                                          1)])
        self.ioc.getCollection(collection).create_index([('max_expiry', 1),
                                                         ('expireAfterSeconds',
                                                          1)])
        return final

    @staticmethod
    def deduplicate_object(ioc,
                           obj,
                           expiry,
                           expiry_max,
                           threshold,
                           source_name,
                           configuration_name,
                           final,
                           collection,
                           data_pointer=None,
                           data_max=None,
                           field_set=None):
        """DeDuplicate Object

        This is the method to actually deduplicate an object. The `final` argument is appended to with the obj if it
        was successfully deduplicated.

        Args:
            ioc (GreaseContainer): IoC for the instance
            obj (dict): Object to be deduplicated
            expiry (int): Hours to deduplicate for
            expiry_max (int): Maximum days to deduplicate for
            threshold (float): level of duplication allowed in an object (the lower the threshold the more uniqueness is required)
            source_name (str): Source of data being deduplicated
            configuration_name (str): Configuration being deduplicated for
            final (list): List to append `obj` to if unique
            collection (str): Name of deduplication collection
            data_pointer (int): If provided will provide log information relating to thread
                (Typically used via `Deduplicate`)
            data_max (int): If provided will provide log information relating to thread
                (Typically used via `Deduplicate`)
            field_set (list): If provided will only deduplicate on list of fields provided

        Returns:
            None: Nothing returned. Updates `final` object

        """
        # first determine if this object has been seen before
        DeDupCollection = ioc.getCollection(collection)
        t1test = obj
        t1test['grease_internal_configuration'] = configuration_name
        T1Hash = DeDupCollection.find_one(
            {'hash': Deduplication.generate_hash_from_obj(t1test)})
        if T1Hash:
            # T1 Found Protocol: We have found a fully duplicate object
            # we have a duplicate source document
            # increase the counter and expiry and move on (DROP)
            ioc.getLogger().debug("Type1 Match found for object", verbose=True)
            # bump the expiry time and move on
            DeDupCollection.update_one({'_id': T1Hash['_id']}, {
                "$set": {
                    'score': int(T1Hash['score']) + 1,
                    'expiry': Deduplication.generate_expiry_time(expiry)
                }
            })
            return
        else:
            # T1 Not Found Protocol: We have a possibly unique object
            ioc.getLogger().debug(
                "Type1 Match not found; Beginning type 2 processing")
            # Create a T1
            T1ObjectId = DeDupCollection.insert_one({
                'expiry':
                Deduplication.generate_expiry_time(int(expiry)),
                'grease_internal_configuration':
                configuration_name,
                'max_expiry':
                Deduplication.generate_max_expiry_time(int(expiry_max)),
                'type':
                1,
                'score':
                1,
                'source':
                str(source_name),
                'hash':
                Deduplication.generate_hash_from_obj(t1test)
            }).inserted_id
            # Begin T2 Deduplication
            compositeScore = Deduplication.object_field_score(
                collection, ioc, source_name, configuration_name, obj,
                str(T1ObjectId), expiry, expiry_max, field_set)
            if compositeScore < threshold:
                # unique obj
                ioc.getLogger().trace(
                    "Unique object! Composite score was: [{0}] threashold: [{1}]"
                    .format(compositeScore, threshold),
                    verbose=True)
                final.append(obj)
                return
            else:
                # likely duplicate value
                ioc.getLogger().trace(
                    "Object surpassed threshold, suspected to be duplicate! "
                    "Composite score was: [{0}] threashold: [{1}]".format(
                        compositeScore, threshold),
                    verbose=True)
                return

    @staticmethod
    def object_field_score(collection,
                           ioc,
                           source_name,
                           configuration_name,
                           obj,
                           objectId,
                           expiry,
                           max_expiry,
                           field_set=None):
        """Returns T2 average uniqueness

        Takes a dictionary and returns the likelihood of that object being unique based on data in the collection

        Args:
            collection (str): Deduplication collection name
            ioc (GreaseContainer): IoC Access
            source_name (str): source of data to be deduplicated
            configuration_name (str): configuration name to be deduplicated
            obj (dict): Single dimensional list to be compared against collection
            objectId (str): T1 Hash Mongo ObjectId to be used to associate fields to a T1
            expiry (int): Hours for deduplication to wait before removing a field if not seen again
            max_expiry (int): Days for deduplication to wait before ensuring object is deleted
            field_set (list, optional): List of fields to deduplicate with if provided. Else will use all keys

        Returns:
            float: Duplication Probability

        """
        # generate field list if not provided
        FieldColl = ioc.getCollection(collection)
        if not isinstance(field_set, list) or len(field_set) <= 0:
            field_set = obj.keys()
        # List to hold field level scores
        field_scores = []
        # iterate over the field set
        for field in field_set:
            # ensure key is in the object
            ioc.getLogger().trace("Starting field [{0}]".format(field),
                                  verbose=True)
            if field in obj:
                if isinstance(obj.get(field), bytes):
                    value = obj.get(field).decode('utf-8', 'ignore')
                else:
                    value = obj.get(field)
                T2Object = {
                    'source': source_name,
                    'field': field,
                    'value': value,
                    'configuration': configuration_name
                }
                checkDoc = FieldColl.find_one(
                    {'hash': Deduplication.generate_hash_from_obj(T2Object)})
                if checkDoc:
                    # we found a 100% matching T2 object
                    ioc.getLogger().trace("T2 object Located", trace=True)
                    update_statement = {
                        "$set": {
                            'score': int(checkDoc['score']) + 1,
                            'expiry':
                            Deduplication.generate_expiry_time(expiry)
                        }
                    }
                    FieldColl.update_one({'_id': checkDoc['_id']},
                                         update_statement)
                    field_scores.append(100)
                    continue
                else:
                    # We have a possible unique value
                    ioc.getLogger().trace("T2 object not found", trace=True)
                    # generate a list to collect similarities to other field objects
                    fieldProbabilityList = []
                    for record in FieldColl.find({'source': source_name, 'configuration': configuration_name, 'field': field, 'type': 2})\
                            .sort('score', pymongo.ASCENDING).limit(100):
                        if Deduplication.string_match_percentage(
                                record['value'], str(T2Object['value'])) > .95:
                            # We've found a REALLY strong match
                            # Set this field's score to that of the match
                            field_scores.append(
                                100 * Deduplication.string_match_percentage(
                                    record['value'], str(T2Object['value'])))
                            # leave the for loop for this field since we found a highly probable match
                            break
                        else:
                            fieldProbabilityList.append(
                                100 * Deduplication.string_match_percentage(
                                    record['value'], str(T2Object['value'])))
                    if fieldProbabilityList:
                        # We have at least one result
                        score = float(
                            sum(fieldProbabilityList) /
                            len(fieldProbabilityList))
                        ioc.getLogger().trace(
                            "Field Score [{0}]".format(score), verbose=True)
                        field_scores.append(score)
                    else:
                        # It is a globally unique field
                        field_scores.append(0)
                    # finally persist the new object
                    T2Object['hash'] = Deduplication.generate_hash_from_obj(
                        T2Object)
                    T2Object['score'] = 1
                    T2Object['expiry'] = Deduplication.generate_expiry_time(
                        expiry)
                    T2Object[
                        'max_expiry'] = Deduplication.generate_max_expiry_time(
                            max_expiry)
                    T2Object['type'] = 2
                    T2Object['parentId'] = ObjectId(objectId)
                    FieldColl.insert_one(T2Object)
            else:
                ioc.getLogger().warning(
                    "field [{0}] not found in object".format(field),
                    trace=True,
                    notify=False)
                continue
        if len(field_scores) is 0:
            return 0.0
        else:
            return float(sum(field_scores) / float(len(field_scores)))

    @staticmethod
    def generate_hash_from_obj(obj):
        """Takes an object and generates a SHA256 Hash of it

        Args:
            obj (object): Hashable object ot generate a SHA256

        Returns:
            str: Object Hash

        """
        return hashlib.sha256(str(obj).encode('utf-8')).hexdigest()

    @staticmethod
    def generate_expiry_time(hours):
        """Generates UTC Timestamp for hours in the future

        Args:
            hours (int): How many hours in the future to expire on

        Returns:
            datetime.datetime: Datetime object for hours in the future

        """
        return datetime.datetime.utcnow() + datetime.timedelta(
            hours=int(hours))

    @staticmethod
    def generate_max_expiry_time(days):
        """Generates UTC Timestamp for hours in the future

        Args:
            days (int): How many days in the future to expire on

        Returns:
            datetime.datetime: Datetime object for days in the future

        """
        return datetime.datetime.utcnow() + datetime.timedelta(days=int(days))

    @staticmethod
    def string_match_percentage(constant, new_value):
        """Returns the percentage likelihood two strings are identical

        Args:
            constant (str): Value to use as base standard
            new_value (str): Value to compare `constant` against

        Returns:
            float: Percentage likelihood of duplicate value

        """
        return difflib.SequenceMatcher(lambda x: x == " ", constant,
                                       new_value).quick_ratio()
Esempio n. 11
0
class Scan(object):
    """Scanning class for GREASE Scanner

    This is the model to actually utilize the scanners to parse the configured environments

    Attributes:
        ioc (GreaseContainer): IOC for scanning
        conf (PrototypeConfig): Prototype configuration instance
        impTool (ImportTool): Import Utility Instance
        dedup (Deduplication): Deduplication instance to be used

    """
    def __init__(self, ioc=None):
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.conf = PrototypeConfig(self.ioc)
        self.impTool = ImportTool(self.ioc.getLogger())
        self.dedup = Deduplication(self.ioc)
        self.scheduler = Scheduling(self.ioc)

    def Parse(self, source=None, config=None):
        """This will read all configurations and attempt to scan the environment

        This is the primary business logic for scanning in GREASE. This method will use configurations to parse
        the environment and attempt to schedule

        Note:
            If a Source is specified then *only* that source is parsed. If a configuration is set then *only* that
            configuration is parsed. If both are provided then the configuration will *only* be parsed if it is of
            the source provided

        Note:
            **If mocking is enabled**: Deduplication *will not occur*

        Args:
            source (str): If set will only parse for the source listed
            config (str): If set will only parse the specified config

        Returns:
            bool: True unless error

        """
        self.ioc.getLogger().trace("Starting Parse of Environment", trace=True)
        Configuration = self.generate_config_set(source=source, config=config)
        ScanPool = []
        lenConfigs = len(Configuration)
        i = 0
        while i < lenConfigs:
            # ensure we don't swamp the system resources
            cpu = cpu_percent(interval=.1)
            mem = virtual_memory().percent
            if \
                    cpu >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')) or \
                    mem >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')):
                self.ioc.getLogger().trace(
                    "Scan sleeping; System resource maximum reached",
                    verbose=True)
                # remove variables
                del cpu
                del mem
                continue
            conf = Configuration[i]
            i += 1
            # ensure no kafka prototypes come into sourcing
            if conf.get('source') == 'kafka':
                continue
            # ensure there is an execution environment
            server, _ = self.scheduler.determineExecutionServer(
                conf.get('exe_env', 'general'))
            if not server:
                self.ioc.getLogger().warning(
                    'configuration skipped -- execution environment offline',
                    additional={
                        'execution_environment':
                        conf.get('exe_env', 'general'),
                        'configuration': conf.get('name')
                    },
                    notify=True)
                continue
            inst = self.impTool.load(conf.get('source', str(uuid4())))
            if not isinstance(inst, BaseSourceClass):
                self.ioc.getLogger().error("Invalid Source [{0}]".format(
                    conf.get('source')),
                                           notify=False)
                del inst
                continue
            else:
                t = threading.Thread(
                    target=self.ParseSource,
                    args=(
                        self.ioc,
                        inst,
                        conf,
                        self.dedup,
                        self.scheduler,
                    ),
                    name="GREASE SOURCING THREAD [{0}]".format(
                        conf.get('name')))
                t.daemon = True
                t.start()
                ScanPool.append(t)
        # wait for threads to finish out
        while len(ScanPool) > 0:
            self.ioc.getLogger().trace(
                "Total current scan threads [{0}]".format(len(ScanPool)),
                trace=True)
            threads_final = []
            for thread in ScanPool:
                if thread.isAlive():
                    threads_final.append(thread)
            ScanPool = threads_final
            self.ioc.getLogger().trace(
                "Total current scan threads [{0}]".format(len(ScanPool)),
                trace=True)
        self.ioc.getLogger().trace("Scanning Complete".format(len(ScanPool)),
                                   trace=True)
        return True

    @staticmethod
    def ParseSource(ioc, source, configuration, deduplication, scheduler):
        """Parses an individual source and attempts to schedule it

        Args:
            ioc (GreaseContainer): IoC Instance
            source (BaseSourceClass): Source to parse
            configuration (dict): Prototype configuration to use
            deduplication (Deduplication): Dedup engine instance
            scheduler (Scheduling): Central Scheduling instance

        Returns:
            None: Meant to be run in a thread

        """
        try:
            # If mock mode enabled
            if ioc.getConfig().get('Sourcing', 'mock'):
                data = source.mock_data(configuration)
            # else actually do sourcing
            else:
                if source.parse_source(configuration):
                    # deduplicate data
                    data = deduplication.Deduplicate(
                        data=source.get_data(),
                        source=configuration.get('source'),
                        configuration=configuration.get('name', str(uuid4())),
                        threshold=source.deduplication_strength,
                        expiry_hours=source.deduplication_expiry,
                        expiry_max=source.deduplication_expiry_max,
                        collection='Dedup_Sourcing',
                        field_set=source.field_set)
                else:
                    ioc.getLogger().warning(
                        "Source [{0}] parsing failed".format(
                            configuration.get('source')),
                        notify=False)
                    data = []
            if len(data) > 0:
                if scheduler.scheduleDetection(configuration.get('source'),
                                               configuration.get('name'),
                                               data):
                    ioc.getLogger().info(
                        "Data scheduled for detection from source [{0}]".
                        format(configuration.get('source')),
                        trace=True)
                    del source
                else:
                    ioc.getLogger().error(
                        "Scheduling failed for source document!", notify=False)
                    del source
            else:
                ioc.getLogger().trace(
                    "Length of data was empty; was not scheduled", trace=True)
                del source
        except BaseException as e:
            ioc.getLogger().error(
                "Failed parsing message got exception! Configuration [{0}] Got [{1}]"
                .format(configuration, e))
            del source

    def generate_config_set(self, source=None, config=None):
        """Examines configuration and returns list of configs to parse

        Note:
            If a Source is specified then *only* that source is parsed. If a configuration is set then *only* that
            configuration is parsed. If both are provided then the configuration will *only* be parsed if it is of
            the source provided

        Args:
            source (str): If set will only parse for the source listed
            config (str): If set will only parse the specified config

        Returns:
            list[dict]: Returns Configurations to Parse for data

        """
        ConfigList = []
        if source and config:
            if self.conf.get_config(config).get('source') == source:
                ConfigList.append(self.conf.get_config(config))
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Configuration [{0}] Not Found With Correct Source [{1}]".
                    format(config, source),
                    trace=True,
                    notify=False)
        elif source and not config:
            if source in self.conf.get_sources():
                for configuration in self.conf.get_source(source):
                    ConfigList.append(configuration)
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Source not found in Configuration [{0}]".format(source),
                    trace=True,
                    notify=False)
        elif not source and config:
            if self.conf.get_config(config):
                ConfigList.append(self.conf.get_config(config))
                return ConfigList
            else:
                self.ioc.getLogger().warning(
                    "Config not found in Configuration [{0}]".format(config),
                    trace=True,
                    notify=False)
        else:
            ConfigList = self.conf.getConfiguration().get('raw')
        return ConfigList
Esempio n. 12
0
class Detect(object):
    """Detection class for GREASE detect

    This is the model to actually utilize the detectors to parse the sources from scan

    Attributes:
        ioc (GreaseContainer): IOC for scanning
        impTool (ImportTool): Import Utility Instance
        conf (PrototypeConfig): Prototype configuration tool
        scheduler (Scheduling): Prototype Scheduling Service Instance

    """
    def __init__(self, ioc=None):
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.impTool = ImportTool(self.ioc.getLogger())
        self.ioc.ensureRegistration()
        self.conf = PrototypeConfig(self.ioc)
        self.scheduler = Scheduling(self.ioc)

    def detectSource(self):
        """This will perform detection the oldest source from SourceData

        Returns:
            bool: If detection process was successful

        """
        sourceData = self.getScheduledSource()
        if sourceData:
            if isinstance(sourceData.get('configuration'), bytes):
                conf = sourceData.get('configuration').decode()
            else:
                conf = sourceData.get('configuration')
            configurationData = self.conf.get_config(conf)
            if configurationData:
                self.ioc.getCollection('SourceData').update_one(
                    {'_id': ObjectId(sourceData.get('_id'))}, {
                        '$set': {
                            'grease_data.detection.start':
                            datetime.datetime.utcnow()
                        }
                    })
                result, resultData = self.detection(sourceData.get('data'),
                                                    configurationData)
                if result:
                    # Put constants in detection results
                    resultData['constants'] = self.conf.get_config(
                        configurationData.get('name')).get('constants', {})
                    # Update detection
                    self.ioc.getCollection('SourceData').update_one(
                        {'_id': ObjectId(sourceData.get('_id'))}, {
                            '$set': {
                                'grease_data.detection.end':
                                datetime.datetime.utcnow(),
                                'grease_data.detection.detection':
                                resultData
                            }
                        })
                    # attempt scheduling
                    return self.scheduler.scheduleScheduling(
                        sourceData.get('_id'))
                else:
                    self.ioc.getCollection('SourceData').update_one(
                        {'_id': ObjectId(sourceData.get('_id'))}, {
                            '$set': {
                                'grease_data.detection.end':
                                datetime.datetime.utcnow(),
                                'grease_data.detection.detection': {}
                            }
                        })
                    self.ioc.getLogger().trace(
                        "Detection yielded no detection data", trace=True)
                    return True
            else:
                self.ioc.getLogger().error(
                    "Failed to load Prototype Config [{0}]".format(
                        sourceData.get('configuration')),
                    notify=False)
                return False
        else:
            self.ioc.getLogger().trace(
                "No sources awaiting detection currently", trace=True)
            return True

    def getScheduledSource(self):
        """Queries for oldest source that has been assigned for detection

        Returns:
            dict: source awaiting detection

        """
        return self.ioc.getCollection('SourceData').find_one(
            {
                'grease_data.detection.server':
                ObjectId(self.ioc.getConfig().NodeIdentity),
                'grease_data.detection.start':
                None,
                'grease_data.detection.end':
                None,
            },
            sort=[('grease_data.createTime', pymongo.DESCENDING)])

    def detection(self, source, configuration):
        """Performs detection on a source with the provided configuration

        Args:
            source (dict): Key->Value pairs from sourcing to detect upon
            configuration (dict): Prototype configuration provided from sourcing

        Returns:
            tuple: Detection Results; first boolean for success, second dict of variables for context

        """
        # Ensure types
        final = {}
        finalBool = False
        if not isinstance(source, dict):
            self.ioc.getLogger().warning("Detection got non-dict source data",
                                         notify=False)
            finalBool = False
            return finalBool, final
        if not isinstance(configuration, dict):
            self.ioc.getLogger().warning(
                "Detection got non-dict configuration", notify=False)
            finalBool = False
            return finalBool, final
        # Now loop through logical blocks
        for detector, logicBlock in configuration.get('logic', {}).items():
            if not isinstance(logicBlock, list):
                self.ioc.getLogger().warning("Logical Block was not list",
                                             trace=True,
                                             notify=False)
            detect = self.impTool.load(detector)
            if isinstance(detect, Detector):
                result, resultData = detect.processObject(source, logicBlock)
                if not result:
                    self.ioc.getLogger().trace(
                        "Detection yielded false for [{0}]".format(detector),
                        trace=True)
                    finalBool = False
                    break
                else:
                    self.ioc.getLogger().trace(
                        "Detection yielded true for [{0}]".format(detector),
                        trace=True)
                    for key, val in resultData.items():
                        final[key] = val
                    finalBool = True
                    continue
            else:
                self.ioc.getLogger().warning(
                    "invalid detector [{0}]".format(detector), notify=False)
                finalBool = False
        return finalBool, final
Esempio n. 13
0
class Scheduler(object):
    """Job Scheduler Model

    This model will attempt to schedule a job for execution

    Attributes:
        ioc (GreaseContainer): IOC for scanning
        impTool (ImportTool): Import Utility Instance
        conf (PrototypeConfig): Prototype configuration tool
        scheduler (Scheduling): Prototype Scheduling Service Instance

    """
    def __init__(self, ioc=None):
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.impTool = ImportTool(self.ioc.getLogger())
        self.ioc.ensureRegistration()
        self.conf = PrototypeConfig(self.ioc)
        self.scheduler = Scheduling(self.ioc)

    def scheduleExecution(self):
        """Schedules the oldest successfully detected source to execution

        Returns:
            bool: True if detection is successful else false

        """
        source = self.getDetectedSource()
        if source:
            self.ioc.getLogger().trace("Attempting schedule of source",
                                       trace=True)
            self.ioc.getCollection('SourceData').update_one(
                {'_id': ObjectId(source.get('_id'))}, {
                    '$set': {
                        'grease_data.scheduling.start':
                        datetime.datetime.utcnow()
                    }
                })
            if self.schedule(source):
                self.ioc.getLogger().trace(
                    "Scheduling [{0}] was successful".format(source['_id']),
                    trace=True)
                self.ioc.getCollection('SourceData').update_one(
                    {'_id': ObjectId(source.get('_id'))}, {
                        '$set': {
                            'grease_data.scheduling.end':
                            datetime.datetime.utcnow()
                        }
                    })
                return True
            else:
                self.ioc.getLogger().error(
                    "Failed to schedule [{0}] for execution".format(
                        source['_id']),
                    trace=True,
                    notify=False)
                self.ioc.getCollection('SourceData').update_one(
                    {'_id': ObjectId(source.get('_id'))}, {
                        '$set': {
                            'grease_data.scheduling.start': None,
                            'grease_data.scheduling.end': None
                        }
                    })
                return False
        else:
            self.ioc.getLogger().trace(
                "No sources detected for this node at this time", trace=True)
            return True

    def getDetectedSource(self):
        """Gets the oldest successfully detected source

        Returns:
            dict: Object from MongoDB

        """
        return self.ioc.getCollection('SourceData').find_one(
            {
                'grease_data.scheduling.server':
                ObjectId(self.ioc.getConfig().NodeIdentity),
                'grease_data.scheduling.start':
                None,
                'grease_data.scheduling.end':
                None
            },
            sort=[('grease_data.createTime', pymongo.DESCENDING)])

    def schedule(self, source):
        """Schedules source for execution

        Returns:
            bool: If scheduling was successful or not

        """
        if isinstance(source['configuration'], bytes):
            config = self.conf.get_config(source['configuration'].decode())
        else:
            config = self.conf.get_config(source['configuration'])
        if not config:
            self.ioc.getLogger().error(
                "Failed to load configuration for source [{0}]".format(
                    source['_id']))
            return False
        server, jobs = self.scheduler.determineExecutionServer(
            config.get('exe_env', 'general'))
        if not server:
            self.ioc.getLogger().error(
                "Failed to find an Execution Node for environment [{0}]".
                format(config.get('exe_env', 'general')))
            return False
        self.ioc.getCollection('SourceData').update_one(
            {'_id': ObjectId(source['_id'])}, {
                '$set': {
                    'grease_data.execution.server':
                    ObjectId(server),
                    'grease_data.execution.assignmentTime':
                    datetime.datetime.utcnow(),
                }
            })
        self.ioc.getCollection('JobServer').update_one(
            {'_id': ObjectId(server)}, {'$set': {
                'jobs': jobs + 1
            }})
        return True
Esempio n. 14
0
class KafkaSource(object):
    """Kafka class for sourcing Kafka messages

    This Model will create and dynamically scale the number of Kafka consumers for the topics
    in the Config, and then sends the parsed messages (containing only the keys/values specified
    in the Config) to Scheduling.

    This Model is designed around the Configs. Each Config gets its own config_manager thread,
    which means Configs also get their own dedicated consumer. It was designed so that any
    "magic numbers" (such as MIN_BACKLOG, MAX_CONSUMERS, etc.) are overwriteable in the Config,
    with the exception of SLEEP_TIME, which can be constant accross Configs.

    Currently, the class only supports Kafka topics which contain JSON, however this functionality
    can easily be expanded on inside of the parse_message method.

    Attributes:
        ioc (GreaseContainer): IOC for scanning
        conf (PrototypeConfig): Prototype configuration instance
        configs (List[dict]): List of Kafka Configs

    Note:
        Currently, only json messages can be decoded from kafka topics

    """
    def __init__(self, ioc=None):
        if ioc and isinstance(ioc, GreaseContainer):
            self.ioc = ioc
        else:
            self.ioc = GreaseContainer()
        self.conf = PrototypeConfig(self.ioc)
        self.configs = []

    def run(self, config=None):
        """This will load all Kafka configs (unless a specific one is provided) and spin up consumer
        threads for all of them.

        It should never return anything unless something goes wrong with Kafka consumption.

        Creates a thread for each Kafka config to begin parsing messages. This parent thread then
        monitors its children, and prunes dead threads. Once all are dead, we return False.

        Note:
            If a configuration is set then *only* that configuration is parsed. If both are provided then the configuration will *only* be parsed if it is of the source provided.

        Args:
            config (dict): If set will only parse the specified config

        Returns:
            bool: False if an error occurs, else never returns

        """
        if config:
            self.configs = [config]
        else:
            self.configs = self.get_configs()

        if not self.validate_configs(self.configs):
            self.ioc.getLogger().error(
                "One or more Kafka Configs are invalid, stopping.")
            return False

        threads = []
        for conf in self.configs:
            threads.append(self.create_consumer_manager_thread(conf))

        while threads:
            threads = list(filter(lambda x: x.is_alive(), threads))

        self.ioc.getLogger().critical(
            "All Kafka consumer managers have died, stopping.")
        return False

    def create_consumer_manager_thread(self, config):
        """Creates and returns a thread running a consumer_manager

        Args:
            config (dict): Configuration for a Kafka Model

        Returns:
            threading.Thread: The thread running consumer_manager

        """
        KafkaSource.sleep(SLEEP_TIME)
        thread = threading.Thread(target=KafkaSource.consumer_manager,
                                  args=(
                                      self.ioc,
                                      config,
                                  ))
        thread.daemon = False
        thread.start()
        self.ioc.getLogger().info(
            "Kafka consumer manager thread started for config: {0}".format(
                config.get("name")))
        return thread

    @staticmethod
    def consumer_manager(ioc, config):
        """Creates and reallocates consumer threads within the same consumer group for a single config

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka Model

        Returns:
            bool: False if all consumers are stopped

        """
        monitor_consumer = KafkaSource.create_consumer(ioc, config)
        threads = [KafkaSource.create_consumer_thread(ioc, config)]

        while threads:
            KafkaSource.reallocate_consumers(ioc, config, monitor_consumer,
                                             threads)
            threads = list(filter(lambda x: x[0].is_alive(), threads))

        return False

    @staticmethod
    def create_consumer_thread(ioc, config):
        """Creates a consumer thread, pipe pair for a given config

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka Model

        Returns:
            threading.Thread: The Thread running the Kafka consumer
            multiprocessing.Pipe: The parent end of the Pipe used to send a kill signal to the consumer thread

        """
        parent_conn, child_conn = Pipe()
        thread = threading.Thread(target=KafkaSource.consume,
                                  args=(
                                      ioc,
                                      config,
                                      child_conn,
                                  ))
        thread.daemon = True
        thread.start()
        ioc.getLogger().info(
            "Kafka consumer thread started for config: {0}".format(
                config.get("name")))
        return thread, parent_conn

    @staticmethod
    def consume(ioc, config, pipe):
        """The Kafka consumer in charge of parsing messages according to the config, then sends the parsed dict to Scheduling

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka Model
            pipe (multiprocessing.Pipe): Child end of the pipe used to receive signals from parent thread

        Returns:
            bool: False if kill signal is received

        """
        consumer = KafkaSource.create_consumer(ioc, config)

        for msg in consumer:
            if pipe.poll():  # If the parent pipe sends a signal
                ioc.getLogger().trace("Kill signal received, stopping",
                                      trace=True)
                return False
            message_dict = KafkaSource.parse_message(ioc, config, msg)
            if message_dict:
                KafkaSource.send_to_scheduling(ioc, config, message_dict)

        return False

    @staticmethod
    def sleep(sleep_sec):
        """Thread safe sleep function that waits sleep_sec seconds without affecting child threads

        Args:
            sleep_sec (int): Number of seconds to idle

        """
        wake_time = time() + sleep_sec
        while time() < wake_time:
            continue

    @staticmethod
    def create_consumer(ioc, config):
        """Creates a KafkaConsumer object from the params in config

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka Model

        Returns:
            kafka.KafkaConsumer: KafkaConsumer object initialized with params from config

        """
        consumer = None
        while not consumer:
            try:
                consumer = KafkaConsumer(
                    group_id=config.get('name'),
                    *config.get('topics'),
                    **{'bootstrap_servers': ",".join(config.get('servers'))})
            except kafka.errors.NoBrokersAvailable:
                ioc.getLogger().error(
                    "No Kafka brokers available for config: {0}, retrying.".
                    format(config.get('name')))
                KafkaSource.sleep(SLEEP_TIME)

        ioc.getLogger().info(
            "Kafka consumer created under group_id: {0}".format(
                config.get('name')))
        KafkaSource.sleep(SLEEP_TIME)  # Gives the consumer time to initialize
        return consumer

    @staticmethod
    def parse_message(ioc, config, message):
        """Parses a message from Kafka according to the config

        Note:
            transform_message extracts only the keys/values from the message as specified in the config. By default, we split the keys by "." - so if you wanted to access the value stored at message[a][b][c], your config would contain the key "a.b.c". You can overwrite the "." key splitter explicitly in your Config. These values will be written to their respective alias also specified in the config. 

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka model
            message (kafka.ConsumerRecord): Individual message received from Kafka topic

        Returns:
            dict: A flat dictionary containing only the keys/values from the message as specified in the config

        """
        try:
            message = json.loads(message.value, strict=False)
            ioc.getLogger().trace("Message successfully loaded", trace=True)
        except ValueError:
            ioc.getLogger().trace("Failed to unload message", trace=True)
            return {}

        final = {}
        for key, alias in config.get("key_aliases", {}).items():
            pointer = message
            for sub_key in key.split(config.get("key_sep", ".")):
                if not isinstance(pointer, dict) or sub_key not in pointer:
                    ioc.getLogger().trace(
                        "Subkey: {0} missing from message".format(sub_key),
                        trace=True)
                    return {}
                pointer = pointer[sub_key]
            final[alias] = str(pointer)

        ioc.getLogger().trace("Message succesfully parsed", trace=True)
        return final

    @staticmethod
    def reallocate_consumers(ioc, config, monitor_consumer, threads):
        """Determines whether to create or kill a consumer based on current message backlog, then performs that action

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka model
            monitor_consumer (kafka.KafkaConsumer): KafkaConsumer used solely for measuring message backlog
            threads (list[(threading.Thread, multiprocessing.Pipe)]): List of current consumer thread/pipe pairs

        Returns:
            int: Number of threads created (Negative value if a thread was killed)
        """
        min_backlog = config.get("min_backlog", MIN_BACKLOG)
        max_backlog = config.get("max_backlog", MAX_BACKLOG)
        max_consumers = config.get("max_consumers", MAX_CONSUMERS)

        backlog1 = KafkaSource.get_backlog(ioc, monitor_consumer)
        KafkaSource.sleep(
            SLEEP_TIME
        )  # We want to wait before checking again in case there is a message spike
        backlog2 = KafkaSource.get_backlog(ioc, monitor_consumer)

        if backlog1 > max_backlog and backlog2 > max_backlog and len(
                threads) < max_consumers:
            threads.append(KafkaSource.create_consumer_thread(ioc, config))
            ioc.getLogger().info(
                "Backlog max reached, spawning a new consumer for {0}".format(
                    config.get('name')),
                verbose=True)
            return 1
        elif backlog1 <= min_backlog and backlog2 <= min_backlog and len(
                threads) > 1:
            KafkaSource.kill_consumer_thread(ioc, threads[0])
            ioc.getLogger().info(
                "Backlog min reached, killing a consumer for {0}".format(
                    config.get('name')),
                verbose=True)
            return -1
        ioc.getLogger().info("No reallocation needed for {0}".format(
            config.get('name')))
        return 0

    @staticmethod
    def kill_consumer_thread(ioc, thread_tup):
        """Sends a kill signal to the thread's pipe

        Note:
            Despite being from the multiprocessing library, Pipes are thread safe in this implementation as we don't share the same
            end of the Pipe to more than one thread. From the multiprocessing documentation:

                The two connection objects returned by Pipe() represent the two ends of the pipe. Each connection object has
                send() and recv() methods (among others). Note that data in a pipe may become corrupted if two threads
                (or threads) try to read from or write to the same end of the pipe at the same time. Of course there is no
                risk of corruption from threads using different ends of the pipe at the same time.

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            thread_tup ((threading.Thread, multiprocessing.Pipe)): Thread/Pipe tuple to be killed

        """
        thread_tup[1].send("STOP")
        ioc.getLogger().trace("Kill signal sent to consumer thread",
                              trace=True)
        KafkaSource.sleep(
            SLEEP_TIME)  # Give consumer a chance to finish its current message

    @staticmethod
    def get_backlog(ioc, consumer):
        """Gets the current message backlog for a given consumer

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            consumer (kafka.KafkaConsumer): The consumer used to poll backlog offsets

        Returns:
            float: the average number of messages accross all partitions in the backlog. -1 if there is an error and excess consumers should be killed

        """
        if not consumer.assignment():
            ioc.getLogger().trace("Assigning consumer to topic", trace=True)
            consumer.poll(
            )  # We need to poll the topic to actually get assigned

        partitions = consumer.assignment()
        if not partitions:
            ioc.getLogger().error("No partitions found for kafka consumer")
            return -1.

        try:
            current_offsets = [consumer.position(part) for part in partitions]
            end_offsets = list(consumer.end_offsets(partitions).values())
        except kafka.errors.KafkaTimeoutError:
            ioc.getLogger().error("KafkaTimeout during backlog check")
            return -1.
        except kafka.errors.UnsupportedVersionError:
            ioc.getLogger().error(
                "This version of kafka does not support backlog lookups")
            return -1.

        if not current_offsets or not end_offsets or len(
                current_offsets) != len(end_offsets):
            ioc.getLogger().error(
                "Backlog check failed for kafka consumer - invalid offsets")
            return -1.

        return float(sum(end_offsets) - sum(current_offsets)) / len(partitions)

    @staticmethod
    def send_to_scheduling(ioc, config, message):
        """Sends a parsed message dictionary to scheduling

        Args:
            ioc (GreaseContainer): Used for logging since we can't use self in threads
            config (dict): Configuration for a Kafka model
            message (dict): Individual parsed message received from Kafka topic

        Returns:
            bool: True if scheduling is successful

        """
        scheduler = Scheduling(ioc)
        if not message:
            return False
        if scheduler.scheduleDetection(config.get('source'),
                                       config.get('name'), message):
            ioc.getLogger().trace(
                "Data scheduled for detection from source [{0}]".format(
                    config.get('source')),
                trace=True)
            return True
        else:
            ioc.getLogger().error(
                "Scheduling failed for kafka source document!", notify=False)
            return False

    def get_configs(self):
        """Gets all Configs with the source 'kafka'

        Returns:
            list[dict]: A list of all kafka config dicts

        """
        self.ioc.getLogger().info("Kafka configs loaded")
        return self.conf.get_source('kafka')

    def validate_configs(self, configs):
        """Checks if configs all have the required keys and that there are no duplicate aliases

        Example Config::   
            {
                "name": "kafka_config",
                "source": "kafka",
                "key_aliases": {
                    "a*b*c": "abc_key",
                    "a*b*d": "abd_key"
                },
                "key_sep": "*",         #opt, defaults "."
                "max_consumers": 32,    #opt, defaults 32
                "topics": [
                    "topic1",
                    "topic2"
                ],
                "servers": [
                    "server.target.com:1234"
                ],
                "max_backlog": 200,     #opt, defaults 200
                "min_backlog": 100      #opt, defaults 50
            }

        Args:
             configs (list[dict]): A list of configs to validate
        Returns:
            bool: True iff all configs are formatted correctly

        """
        required_keys = {
            "name": str,
            "source": str,
            "topics": list,
            "servers": list,
            "key_aliases": dict
        }
        opt_keys = {
            "key_sep": str,
            "max_consumers": int,
            "min_backlog": int,
            "max_backlog": int
        }
        for config in configs:
            for key, key_type in required_keys.items():
                if not config.get(key) and not isinstance(
                        config.get(key), key_type):
                    self.ioc.getLogger().error(
                        "Config: {0} has an invalid key: {1}".format(
                            config.get('name'), key),
                        notify=True)
                    return False
                if key_type in (list, dict) and len(config.get(key)) == 0:
                    self.ioc.getLogger().error(
                        "Config: {0} has an invalid key: {1}".format(
                            config.get('name'), key),
                        notify=True)
                    return False

            for key, key_type in opt_keys.items():
                if config.get(key) and not isinstance(config.get(key),
                                                      key_type):
                    self.ioc.getLogger().error(
                        "Config: {0} has an invalid key: {1}".format(
                            config.get('name'), key),
                        notify=True)
                    return False
                if config.get(key) and key_type in (list, dict) and len(
                        config.get(key)) == 0:
                    self.ioc.getLogger().error(
                        "Config: {0} has an invalid key: {1}".format(
                            config.get('name'), key),
                        notify=True)
                    return False

            if config.get("source") != "kafka":
                self.ioc.getLogger().error(
                    "Config: {0} is not a kafka config, but it has been loaded by KafkaSource"
                    .format(config.get('name')),
                    notify=True)
                return False

            aliases = list(config.get("key_aliases").values())
            if len(aliases) != len(
                    set(aliases)
            ):  # if there is a duplicate alias, it is an invalid config
                self.ioc.getLogger().error(
                    "Config: {0} has duplicate key_aliases".format(
                        config.get('name')),
                    notify=True)
                return False

        return True