Ejemplo n.º 1
0
def _read_data_from_file(filepath):
    """Provide validated information when valid.

    Args:
        filepath: Path to file

    Returns:
        data: Data

    """
    # Initialize key variables
    data = {}

    # Try reading file if filename format is OK
    if _valid_filename(filepath) is True:
        # Ingest data
        try:
            with open(filepath, 'r') as f_handle:
                data = json.load(f_handle)
        except:
            # Log status
            log_message = (
                'File %s does not contain JSON data, does not exist, '
                'or is unreadable.') % (filepath)
            log.log2warn(1006, log_message)

    else:
        # Log status
        log_message = (
            'File %s does has incorrect filename format.'
            '') % (filepath)
        log.log2warn(1026, log_message)

    # Return
    return data
Ejemplo n.º 2
0
def _check_timestamp_key(information):
    """Check whether timestamp key is an integer value.

    Args:
        information: Dict of JSON data

    Returns:
        valid: True if valid

    """
    # Initialize key variables
    valid = True
    key = 'timestamp'

    # Verify we have a dictionary
    if isinstance(information, dict) is False:
        log_message = ('Ingest data is not a dictionary')
        log.log2warn(1110, log_message)
        valid = False
    else:
        # Timestamp must be an integer
        try:
            if key in information:
                int(information[key])
        except:
            log_message = ('Ingest data has no valid timestamp key')
            log.log2warn(1047, log_message)
            valid = False

    # Return
    return valid
Ejemplo n.º 3
0
def _check_primary_keys_exist(information):
    """Check whether primary keys exist in json.

    Args:
        information: Data dictionary to check

    Returns:
        valid: True if valid

    """
    # Initialize key variables
    valid = True
    agent_meta_keys = ['timestamp', 'uid', 'agent', 'hostname']

    # Verify we have a dictionary
    if isinstance(information, dict) is False:
        log_message = ('Ingest data is not a dictionary')
        log.log2warn(1093, log_message)
        valid = False
    else:
        # Verify keys in information
        for key in agent_meta_keys:
            if key not in information:
                # Log status and stop processing
                valid = False
                log_message = ('Ingest data does not have a %s key.') % (key)
                log.log2warn(1039, log_message)
                break

    # Return
    return valid
Ejemplo n.º 4
0
    def purge(self):
        """Purge cache file that was read.

        Args:
            None

        Returns:
            success: "True" if successful

        """
        # Initialize key variables
        success = True

        try:
            os.remove(self.filename)
        except:
            success = False

        # Report success
        if success is True:
            log_message = (
                'Ingest cache file %s deleted') % (self.filename)
            log.log2quiet(1046, log_message)
        else:
            log_message = (
                'Failed to delete ingest cache file %s') % (self.filename)
            log.log2warn(1087, log_message)

        # Return
        return success
Ejemplo n.º 5
0
    def _poll(self):
        """Query all remote hosts for data.

        Args:
            None

        Returns:
            None

        """
        # Initialize key variables
        pollers = []

        # Create a list of polling objects
        hostnames = self.config.agent_hostnames()

        for hostname in hostnames:
            # Only poll hosts that exist in the database
            if db_host.hostname_exists(hostname) is False:
                log_message = (
                    'Agent "%s": Hostname %s in the configuration file '
                    'does not exist in the database. '
                    'Run the snmp_evaluate_hosts.py script.'
                    '') % (self.agent_name, hostname)
                log.log2warn(1095, log_message)
                continue

            # Add poller
            poller = Poller(hostname, self.agent_name)
            pollers.append(poller)

        # Start threaded polling
        if bool(pollers) is True:
            Agent.threads(self.agent_name, pollers)
Ejemplo n.º 6
0
    def valid(self):
        """Master method that defines whether data is OK.

        Args:
            None

        Returns:
            all_ok:

        """
        # Initialize key variables
        validity = [self.validated]

        # Append results of tests
        validity.append(self._check_meta())
        validity.append(self._check_data_types())
        validity.append(self._check_duplicates())

        # Do final check
        if False in validity:
            all_ok = False
            # Error message
            if self.filepath is not None:
                log_message = (
                    'Cache file %s is invalid'
                    '') % (self.filepath)
                log.log2warn(1021, log_message)
            else:
                log_message = ('Cache data is invalid')
                log.log2warn(1059, log_message)
        else:
            all_ok = True

        # Return
        return all_ok
Ejemplo n.º 7
0
    def __init__(self, agent_name):
        """Method initializing the class.

        Args:
            agent_name

        Returns:
            None

        """
        # Initialize key variables
        self.agent_name = agent_name
        self.agent_yaml = {}

        # Get the language used
        config = jm_configuration.Config()
        lang = config.language()

        # Determine the agent's language yaml file
        root_directory = jm_general.root_directory()
        yaml_file = (
            '%s/infoset/metadata/%s/agents/%s.yaml') % (
                root_directory, lang, self.agent_name)

        # Read the agent's language yaml file
        if os.path.exists(yaml_file) is True:
            with open(yaml_file, 'r') as file_handle:
                yaml_from_file = file_handle.read()
            self.agent_yaml = yaml.load(yaml_from_file)
        else:
            log_message = ('Agent language file %s does not exist.') % (
                yaml_file)
            log.log2warn(1034, log_message)
Ejemplo n.º 8
0
    def run(self):
        """Update the database using threads."""
        while True:
            # Initialize key variables
            hostname = None

            # Get the data_dict
            data_dict = self.queue.get()
            uid = data_dict['uid']
            metadata = data_dict['metadata']
            config = data_dict['config']

            # Initialize other values
            max_timestamp = 0

            # Sort metadata by timestamp
            metadata.sort()

            # Process file for each timestamp, starting from the oldes file
            for (timestamp, filepath) in metadata:
                # Read in data
                ingest = drain.Drain(filepath)

                # Make sure file is OK
                # Move it to a directory for further analysis
                # by administrators
                if ingest.valid() is False:
                    log_message = (
                        'Cache ingest file %s is invalid. Moving.'
                        '') % (filepath)
                    log.log2warn(1054, log_message)
                    shutil.copy(
                        filepath, config.ingest_failures_directory())
                    os.remove(filepath)
                    continue

                # Update database
                dbase = UpdateDB(ingest)
                dbase.update()

                # Get the max timestamp
                max_timestamp = max(timestamp, max_timestamp)

                # Get hostname
                hostname = ingest.hostname()

                # Purge source file
                ingest.purge()

            # Update the last time the agent was contacted
            _update_agent_last_update(uid, max_timestamp)

            # Update the host / agent table timestamp if hostname was processed
            if hostname is not None:
                _host_agent_last_update(hostname, uid, max_timestamp)

            # All done!
            self.queue.task_done()
Ejemplo n.º 9
0
    def stop(self):
        """Stop the daemon.

        Args:
            None

        Returns:

        """
        # Get the pid from the pidfile
        try:
            with open(self.pidfile, 'r') as pf_handle:
                pid = int(pf_handle.read().strip())
        except IOError:
            pid = None

        if not pid:
            log_message = (
                'PID file: %s does not exist. Daemon not running?'
                '') % (self.pidfile)
            log.log2warn(1063, log_message)
            # Not an error in a restart
            return

        # Try killing the daemon process
        try:
            while 1:
                if self.lockfile is None:
                    os.kill(pid, signal.SIGTERM)
                else:
                    time.sleep(0.3)
                    if os.path.exists(self.lockfile) is True:
                        continue
                    else:
                        os.kill(pid, signal.SIGTERM)
                time.sleep(0.3)
        except OSError as err:
            error = str(err.args)
            if error.find("No such process") > 0:
                self.delpid()
                self.dellock()
            else:
                log_message = (str(err.args))
                log_message = (
                    '%s - PID file: %s') % (log_message, self.pidfile)
                log.log2die(1068, log_message)
        except:
            log_message = (
                'Unknown daemon "stop" error for PID file: %s'
                '') % (self.pidfile)
            log.log2die(1066, log_message)

        # Log success
        self.delpid()
        self.dellock()
        log_message = ('Daemon Stopped - PID file: %s') % (self.pidfile)
        log.log2quiet(1071, log_message)
Ejemplo n.º 10
0
def _check_duplicates(information):
    """Check whether reported data reported is already in the database.

    Args:
        None

    Returns:
        valid: True if valid

    """
    # Initialize key variables
    valid = True

    # Check that we are evaluating a dict
    if isinstance(information, dict) is False:
        log_message = ('Ingest data is not a dictionary')
        log.log2warn(1116, log_message)
        valid = False
        return valid

    # Check that we have the correct keys in the dict
    if _check_primary_keys_exist(information) is False:
        valid = False
        return valid

    # Get values
    timestamp = int(information['timestamp'])
    uid = information['uid']
    hostname = information['hostname']

    # Check if there is a duplicate entry for this UID
    if db_agent.uid_exists(uid) is not False:
        idx_agent = db_agent.GetUID(uid).idx()

        # Check if host exists
        if db_host.hostname_exists(hostname) is True:
            idx_host = db_host.GetHost(hostname).idx()

            # Check for host / agent entry existence
            if db_hostagent.host_agent_exists(
                    idx_host, idx_agent) is True:
                # Check if this host / agent has been updated before
                last_timesamp = db_hostagent.GetHostAgent(
                    idx_host, idx_agent).last_timestamp()

                # Validate
                if timestamp <= last_timesamp:
                    log_message = (
                        'Data for UID %s, hostname %s at timestamp %s '
                        'is already found in database.'
                        '') % (uid, hostname, timestamp)
                    log.log2warn(1113, log_message)
                    valid = False

    # Return
    return valid
Ejemplo n.º 11
0
    def post(self, save=True, data=None):
        """Post data to central server.

        Args:
            save: When True, save data to cache directory if postinf fails
            data: Data to post. If None, then uses self.data

        Returns:
            success: "True: if successful

        """
        # Initialize key variables
        success = False
        response = False
        timestamp = self.data['timestamp']
        uid = self.data['uid']

        # Create data to post
        if data is None:
            data = self.data

        # Post data save to cache if this fails
        try:
            result = requests.post(self.url, json=data)
            response = True
        except:
            if save is True:
                # Create a unique very long filename to reduce risk of
                hosthash = jm_general.hashstring(self.data['hostname'], sha=1)
                filename = ('%s/%s_%s_%s.json') % (
                    self.cache_dir, timestamp, uid, hosthash)

                # Save data
                with open(filename, 'w') as f_handle:
                    json.dump(data, f_handle)

        # Define success
        if response is True:
            if result.status_code == 200:
                success = True

        # Log message
        if success is True:
            log_message = (
                'Agent "%s" successfully contacted server %s'
                '') % (self.name(), self.url)
            log.log2quiet(1027, log_message)
        else:
            log_message = (
                'Agent "%s" failed to contact server %s'
                '') % (self.name(), self.url)
            log.log2warn(1028, log_message)

        # Return
        return success
Ejemplo n.º 12
0
    def add_all(self, data_list, error_code, die=True):
        """Do a database modification.

        Args:
            data_list: List of sqlalchemy table objects
            error_code: Error number to use if one occurs
            die: Don't die if False, just return success

        Returns:
            success: True is successful

        """
        # Initialize key variables
        success = False

        # Open database connection. Prepare cursor
        session = self.session()

        try:
            # Update the database cache
            session.add_all(data_list)

            # Commit  change
            session.commit()

            # Update success
            success = True

        except Exception as exception_error:
            success = False
            session.rollback()
            log_message = (
                'Unable to modify database connection. '
                'Error: \"%s\"') % (exception_error)
            if die is True:
                log.log2die(error_code, log_message)
            else:
                log.log2warn(error_code, log_message)

        except:
            success = False
            session.rollback()
            log_message = ('Unexpected database exception')
            if die is True:
                log.log2die(error_code, log_message)
            else:
                log.log2warn(error_code, log_message)

        # disconnect from server
        self.close()

        # Return
        return success
Ejemplo n.º 13
0
    def query(self):
        """Query all remote hosts for data.

        Args:
            None

        Returns:
            None

        """
        # Check each hostname
        hostnames = self.config.agent_snmp_hostnames()
        for hostname in hostnames:
            # Get valid SNMP credentials
            validate = snmp_manager.Validate(
                hostname, self.snmp_config.snmp_auth())
            snmp_params = validate.credentials()

            # Log message
            if snmp_params is None:
                log_message = (
                    'No valid SNMP configuration found '
                    'for host "%s" ') % (hostname)
                log.log2warn(1006, log_message)
                continue

            # Create Query make sure MIB is supported
            snmp_object = snmp_manager.Interact(snmp_params)
            snmp_query = mib_sentry3.init_query(snmp_object)
            if snmp_query.supported() is False:
                log_message = (
                    'The Sentry3 MIB is not supported by host  "%s"'
                    '') % (hostname)
                log.log2warn(1001, log_message)
                continue

            # Get the UID for the agent after all preliminary checks are OK
            uid_env = agent.get_uid(hostname)

            # Post data to the remote server
            self.upload(uid_env, hostname, snmp_query)
Ejemplo n.º 14
0
    def query(self):
        """Query all remote hosts for data.

        Args:
            None

        Returns:
            None

        """
        # Initialize key variables
        port = self.config.agent_port()

        # Set logging
        log_file = self.config.log_file()
        logging.basicConfig(filename=log_file, level=logging.DEBUG)

        # Post data to the remote server
        log.log2warn(101010101010101010101010101010101, 'boo')
        infoset.run(
            debug=True, host='0.0.0.0',
            threaded=True, port=port)
Ejemplo n.º 15
0
    def _check_primary_keys_in_file(self):
        """Validate the values of the primary JSON keys in the ingest file.

        Args:
            None

        Returns:
            valid: True if valid

        """
        # Initialize key variables
        valid = True
        filepath = self.filepath

        # Parse filename for information
        if self.filepath is not None:
            if _valid_filename(filepath) is True:
                filename = os.path.basename(filepath)
                (name, _) = filename.split('.')
                (tstamp, uid, _) = name.split('_')
                timestamp = int(tstamp)

                # Double check that the UID and timestamp in the
                # filename matches that in the file.
                # Ignore invalid files as a safety measure.
                # Don't try to delete. They could be owned by some
                # one else and the daemon could crash
                if uid != self.information['uid']:
                    log_message = (
                        'UID %s in file %s does not match UID %s in filename.'
                        '') % (self.information['uid'], uid, filepath)
                    log.log2warn(1123, log_message)
                    valid = False

                # Check timestamp
                if timestamp != self.information['timestamp']:
                    log_message = (
                        'Timestamp %s in file %s does not match timestamp '
                        '%s in filename.'
                        '') % (
                            self.information['timestamp'],
                            timestamp, filepath)
                    log.log2warn(1111, log_message)
                    valid = False

                # Check timestamp validity
                if jm_general.validate_timestamp(timestamp) is False:
                    log_message = (
                        'Timestamp %s in file %s is not normalized'
                        '') % (self.information['timestamp'], filepath)
                    log.log2warn(1112, log_message)
                    valid = False
            else:
                valid = False

        # Return
        return valid
Ejemplo n.º 16
0
def _check_reported_data(information):
    """Check the data types being reported by the agent.

    Args:
        information: Data to analyze

    Returns:
        valid: True if valid

    """
    # Initialize key variables
    valid = True
    data_types = ['chartable', 'other']

    # Check that we are evaluating a dict
    if isinstance(information, dict) is False:
        log_message = ('Ingest data is not a dictionary')
        log.log2warn(1121, log_message)
        valid = False
        return valid

    # Process chartable data
    for data_type in data_types:
        # Skip if data type isn't in the data
        if data_type not in information:
            continue

        # Process the data type
        for _, reported_data in sorted(
                information[data_type].items()):
            # Process keys
            for key in ['base_type', 'description', 'data']:
                if key not in reported_data:
                    log_message = (
                        '"%s" data type does not contain a "%s" key.'
                        '') % (data_type, key)
                    log.log2warn(1115, log_message)
                    valid = False

            # Process data
            if 'data' in reported_data:
                for datapoint in reported_data['data']:
                    if len(datapoint) != 3:
                        log_message = (
                            '"%s" data type does not contain valid '
                            'datapoints in it\'s "data" key.'
                            '') % (data_type)
                        log.log2warn(1114, log_message)
                        valid = False

    # Return
    return valid
Ejemplo n.º 17
0
def threads(agent_name, pollers):
    """Function where agents poll devices using multithreading.

    Args:
        agent_name: Agent name
        pollers: List of polling objects

    Returns:
        None

    """
    # Get configuration
    config = jm_configuration.Config()
    threads_in_pool = config.agent_threads()

    # Spawn processes only if we have files to process
    if bool(pollers) is True:
        # Process lock file
        f_obj = hidden.File()
        lockfile = f_obj.lock(agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                "Agent lock file %s exists. Multiple agent daemons "
                "running or the daemon may have died "
                "catastrophically in the past, in which case the lockfile "
                "should be deleted. Exiting agent process. "
                "Will try again later."
                ""
            ) % (lockfile)
            log.log2warn(1044, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, "a").close()

        # Spawn a pool of threads, and pass them queue instance
        for _ in range(min(threads_in_pool, len(pollers))):
            update_thread = AgentThread(THREAD_QUEUE)
            update_thread.daemon = True

            # Sometimes we exhaust the thread abilities of the OS
            # even with the "threads_in_pool" limit. This is because
            # there could be a backlog of files to cache files process
            # and we have overlapping ingests due to a deleted lockfile.
            # This code ensures we don't exceed the limits.
            try:
                update_thread.start()
            except RuntimeError:
                log_message = (
                    'Too many threads created for agent "%s". ' "Verify that agent lock file is present."
                ) % (agent_name)

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1078, log_message)
            except:
                log_message = ("Unknown error occurred when trying to " 'create threads for agent "%s"') % (agent_name)

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1079, log_message)

        # Start polling
        for poller in pollers:
            ##############################################################
            #
            # Define variables that will be required for the threading
            # We have to initialize the dict during every loop to prevent
            # data corruption
            #
            ##############################################################
            THREAD_QUEUE.put(poller)

        # Wait on the queue until everything has been processed
        THREAD_QUEUE.join()

        # PYTHON BUG. Join can occur while threads are still shutting down.
        # This can create spurious "Exception in thread (most likely raised
        # during interpreter shutdown)" errors.
        # The "time.sleep(1)" adds a delay to make sure things really terminate
        # properly. This seems to be an issue on virtual machines in Dev only
        time.sleep(1)

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)
Ejemplo n.º 18
0
def process(agent_name):
    """Method initializing the class.

    Args:
        agent_name: agent name

    Returns:
        None

    """
    # Initialize key variables
    argument_list = []
    uid_metadata = defaultdict(lambda: defaultdict(dict))

    # Configuration setup
    config = jm_configuration.Config()
    threads_in_pool = config.ingest_threads()

    # Make sure we have database connectivity
    if db.connectivity() is False:
        log_message = (
            'No connectivity to database. Check if running. '
            'Check database authentication parameters.'
            '')
        log.log2warn(1053, log_message)
        return

    # Get meta data on files
    uid_metadata = validate_cache_files()

    # Spawn processes only if we have files to process
    if bool(uid_metadata.keys()) is True:
        # Process lock file
        f_obj = hidden.File()
        lockfile = f_obj.lock(agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                'Ingest lock file %s exists. Multiple ingest daemons running '
                'or lots of cache files to ingest. Ingester may have died '
                'catastrophically in the past, in which case the lockfile '
                'should be deleted. Exiting ingest process. '
                'Will try again later.'
                '') % (lockfile)
            log.log2warn(1069, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, 'a').close()

        # Read each cache file
        for hosthash in uid_metadata.keys():
            for uid in uid_metadata[hosthash].keys():
                # Create a list of arguments to process
                argument_list.append(
                    (config, uid_metadata[hosthash][uid])
                )

        # Create a pool of sub process resources
        with Pool(processes=threads_in_pool) as pool:

            # Create sub processes from the pool
            pool.map(_wrapper_process, argument_list)

        # Wait for all the processes to end
        # pool.join()

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)
Ejemplo n.º 19
0
def _check_chartable_data(information):
    """Check the data types being reported by the agent.

    Args:
        information: Data to analyze

    Returns:
        valid: True if valid

    """
    # Initialize key variables
    valid = True
    data_type = 'chartable'

    # Check that we are evaluating a dict
    if isinstance(information, dict) is False:
        log_message = ('Ingest data is not a dictionary')
        log.log2warn(1122, log_message)
        valid = False
        return valid

    # Check for chartable data
    if data_type in information:

        # Process the data type
        for _, reported_data in sorted(
                information[data_type].items()):

            # Make sure the base types are numeric
            if 'base_type' in reported_data:
                try:
                    float(reported_data['base_type'])
                except:
                    log_message = (
                        'Chartable "base_type" key is non numeric.')
                    log.log2warn(1120, log_message)
                    valid = False
            else:
                log_message = (
                    'Chartable data has no "base_type" key.')
                log.log2warn(1117, log_message)
                valid = False

            # Process data
            if 'data' in reported_data:
                for datapoint in reported_data['data']:
                    # Check to make sure value is numeric
                    value = datapoint[1]
                    try:
                        float(value)
                    except:
                        log_message = (
                            'Chartable data has non numeric data values.')
                        log.log2warn(1119, log_message)
                        valid = False
                        break
            else:
                log_message = (
                    'Chartable data has no "data" key.')
                log.log2warn(1118, log_message)
                valid = False

    # Return
    return valid
Ejemplo n.º 20
0
    def _datapoints(self):
        """Create the master dictionary for the host.

        Args:
            None
        Returns:
            value: Index value

        """
        # Initialize key variables
        snmp_params = self.snmp_params
        master = self._master()

        # Get sources
        snmp_object = snmp_manager.Interact(snmp_params)
        for labels_oid in master.keys():
            sources = {}
            oid_results = snmp_object.swalk(labels_oid)

            # Return if there is an error
            if bool(oid_results) is False:
                log_message = (
                    'Failed to contact SNMP host %s. '
                    'Will collect data on next poll.'
                    '') % (self.hostname)
                log.log2warn(1024, log_message)
                return

            for key, value in oid_results.items():
                sources[_index(labels_oid, key)] = jm_general.decode(value)

        # Get values
        for labels_oid in master.keys():
            for agent_label in master[labels_oid].keys():
                # Initialize datapoints
                datapoints = defaultdict(lambda: defaultdict(dict))

                # Information about the OID
                values_oid = master[labels_oid][agent_label]['values_oid']
                base_type = master[labels_oid][agent_label]['base_type']
                multiplier = master[labels_oid][agent_label]['multiplier']

                # Get OID values
                values = {}
                oid_results = snmp_object.swalk(values_oid)

                # Return if there is an error
                if bool(oid_results) is False:
                    log_message = (
                        'Failed to contact SNMP host %s. '
                        'Will collect data on next poll.'
                        '') % (self.hostname)
                    log.log2warn(1022, log_message)
                    return

                # Only process floating point values
                for key, value in oid_results.items():
                    try:
                        _ = float(value)
                    except:
                        continue
                    values[_index(labels_oid, key)] = value * multiplier

                # Create list of data for json
                data = []
                for index, value in values.items():
                    data.append([index, value, sources[index]])

                # Finish up dict for json
                datapoints[agent_label]['data'] = data
                datapoints[agent_label]['base_type'] = base_type

                # Populate agent
                self.agent.populate(datapoints)

        # Post data
        self.agent.post()
Ejemplo n.º 21
0
    def __init__(self, filename):
        """Method initializing the class.

        Args:
            filename: Cache filename

        Returns:
            None

        """
        # Initialize key variables
        self.filename = filename
        self.data = defaultdict(lambda: defaultdict(dict))
        self.metadata = []
        self.validated = False
        self.agent_meta = {}
        data_types = ['chartable', 'other']
        agent_meta_keys = ['timestamp', 'uid', 'agent', 'hostname']

        # Ingest data
        validator = validate.ValidateCache(filename)
        information = validator.getinfo()

        # Log if data is bad
        if information is False:
            log_message = (
                'Cache ingest file %s is invalid.') % (filename)
            log.log2warn(1051, log_message)
            return
        else:
            self.validated = True

        if self.validated is True:
            # Get universal parameters from file. Convert to unicode
            for key in agent_meta_keys:
                if key == 'timestamp':
                    self.agent_meta[key] = int(information[key])
                else:
                    self.agent_meta[key] = information[key]
            timestamp = self.agent_meta['timestamp']
            uid = self.agent_meta['uid']

            # Process chartable data
            for data_type in data_types:
                # Skip if data type isn't in the data
                if data_type not in information:
                    continue

                # Process the data type
                for label, group in sorted(
                        information[data_type].items()):
                    # Get universal parameters for group
                    base_type = _base_type(group['base_type'])
                    description = group['description']

                    # Initialize base type
                    if base_type not in self.data[data_type]:
                        self.data[data_type][base_type] = []

                    # Process data
                    for datapoint in group['data']:
                        index = datapoint[0]
                        value = datapoint[1]
                        source = datapoint[2]
                        did = _did(
                            uid, label, index,
                            self.agent_meta['agent'],
                            self.agent_meta['hostname'])

                        # Update data
                        if base_type != 0:
                            self.data[data_type][base_type].append(
                                (uid, did, value, timestamp)
                            )
                        else:
                            self.data[data_type][base_type].append(
                                (uid, did, value, timestamp)
                            )

                        # Update sources after fixing encoding
                        self.metadata.append(
                            (uid, did, label, source,
                             description, base_type)
                        )
Ejemplo n.º 22
0
    def valid(self):
        """Master method that defines whether data is OK.

        Args:
            None

        Returns:
            all_ok:

        """
        # Initialize key variables
        validity = []
        ts_start = time.time()

        # Check primary keys
        validity.append(
            _check_primary_keys_exist(self.information))

        # Check timestamp key
        if False not in validity:
            validity.append(
                _check_timestamp_key(self.information))

        # Check validity of primary keys in file
        if False not in validity:
            validity.append(
                self._check_primary_keys_in_file())

        # Check chartable and unchartable data in the data
        if False not in validity:
            validity.append(_check_reported_data(self.information))
        if False not in validity:
            validity.append(_check_chartable_data(self.information))

        # Check if data to be validated is already in the database
        if False not in validity:
            validity.append(_check_duplicates(self.information))

        # Do final check
        if False in validity:
            # Log failure
            if self.filepath is None:
                mid_string = ''
            else:
                mid_string = ('in %s') % (self.filepath)
            log_message = ('Cache data %s is invalid') % (mid_string)
            log.log2warn(1059, log_message)
            all_ok = False
        else:
            # Log success
            ts_stop = time.time()
            duration = ts_stop - ts_start
            if self.filepath is None:
                mid_string = ''
            else:
                mid_string = ('of %s') % (self.filepath)
            log_message = (
                'Data validation %s took %s seconds.'
                '') % (mid_string, round(duration, 4))
            log.log2quiet(1126, log_message)
            all_ok = True

        # Return
        return all_ok
Ejemplo n.º 23
0
    def run(self):
        """Update the database using threads."""
        while True:
            # Get the data_dict
            data_dict = self.queue.get()
            uid = data_dict['uid']
            metadata = data_dict['metadata']
            config = data_dict['config']
            agents = data_dict['agents']
            datapoints = data_dict['datapoints']

            # Initialize other values
            max_timestamp = 0

            # Sort metadata by timestamp
            metadata.sort()

            # Process file for each timestamp
            for (timestamp, filepath) in metadata:
                # Read in data
                ingest = drain.Drain(filepath)

                # Make sure file is OK
                # Move it to a directory for further analysis
                # by administrators
                if ingest.valid() is False:
                    log_message = (
                        'Cache ingest file %s is invalid. Moving.'
                        '') % (filepath)
                    log.log2warn(1054, log_message)
                    shutil.move(
                        filepath, config.ingest_failures_directory())
                    continue

                # Update agent table if not there
                if ingest.uid() not in agents:
                    _insert_agent(
                        ingest.uid(),
                        ingest.agent(),
                        ingest.hostname(),
                        config
                        )
                    # Append the new insertion to the list
                    agents.append(ingest.uid())

                # Update datapoint metadata if not there
                for item in ingest.sources():
                    did = item[1]
                    if did not in datapoints:
                        _insert_datapoint(item, config)
                        # Append the new insertion to the list
                        datapoints.append(did)

                # Create map of DIDs to database row index values
                mapping = _datapoints_by_did(config)

                # Update chartable data
                _update_chartable(mapping, ingest, config)
                _update_unchartable(mapping, ingest, config)

                # Get the max timestamp
                max_timestamp = max(timestamp, max_timestamp)

                # Purge source file
                ingest.purge()

            # Update the last time the agent was contacted
            _update_agent_last_update(uid, max_timestamp, config)

            # All done!
            self.queue.task_done()
Ejemplo n.º 24
0
def process(agent_name):
    """Method initializing the class.

    Args:
        agent_name: agent name

    Returns:
        None

    """
    # Initialize key variables
    uid_metadata = defaultdict(lambda: defaultdict(dict))

    # Configuration setup
    config = jm_configuration.Config()
    threads_in_pool = config.ingest_threads()

    # Make sure we have database connectivity
    if db.connectivity() is False:
        log_message = (
            'No connectivity to database. Check if running. '
            'Check database authentication parameters.'
            '')
        log.log2warn(1053, log_message)
        return

    # Get meta data on files
    uid_metadata = validate_cache_files()

    # Spawn processes only if we have files to process
    if bool(uid_metadata.keys()) is True:
        # Process lock file
        f_obj = hidden.File()
        lockfile = f_obj.lock(agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                'Ingest lock file %s exists. Multiple ingest daemons running '
                'or lots of cache files to ingest. Ingester may have died '
                'catastrophically in the past, in which case the lockfile '
                'should be deleted. Exiting ingest process. '
                'Will try again later.'
                '') % (lockfile)
            log.log2warn(1069, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, 'a').close()

        # Spawn a pool of threads, and pass them queue instance
        # Only create the required number of threads up to the
        # threads_in_pool maximum
        for _ in range(
                min(threads_in_pool, len(uid_metadata))):
            update_thread = ProcessUID(THREAD_QUEUE)
            update_thread.daemon = True

            # Sometimes we exhaust the thread abilities of the OS
            # even with the "threads_in_pool" limit. This is because
            # there could be a backlog of files to cache files process
            # and we have overlapping ingests due to a deleted lockfile.
            # This code ensures we don't exceed the limits.
            try:
                update_thread.start()
            except RuntimeError:
                log_message = (
                    'Too many threads created for cache ingest. '
                    'Verify that ingest lock file is present.')

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1067, log_message)
            except:
                log_message = (
                    'Unknown error occurred when trying to '
                    'create cache ingest threads')

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1072, log_message)

        # Read each cache file
        for hosthash in uid_metadata.keys():
            for uid in uid_metadata[hosthash].keys():
                ##############################################################
                #
                # Define variables that will be required for the threading
                # We have to initialize the dict during every loop to prevent
                # data corruption
                #
                ##############################################################
                data_dict = {}
                data_dict['uid'] = uid
                data_dict['metadata'] = uid_metadata[hosthash][uid]
                data_dict['config'] = config
                THREAD_QUEUE.put(data_dict)

        # Wait on the queue until everything has been processed
        THREAD_QUEUE.join()

        # PYTHON BUG. Join can occur while threads are still shutting down.
        # This can create spurious "Exception in thread (most likely raised
        # during interpreter shutdown)" errors.
        # The "time.sleep(1)" adds a delay to make sure things really terminate
        # properly. This seems to be an issue on virtual machines in Dev only
        time.sleep(1)

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)
Ejemplo n.º 25
0
    def process(self):
        """Update the database using threads."""
        # Initialize key variables
        updated = False
        hostnames = []
        uids = []
        ingests = []
        agent_names = []
        agent_data = {
            'hostname': None,
            'uid': None,
            'sources': [],
            'chartable': [],
            'unchartable': []
        }

        # Get the data_dict
        metadata = self.metadata
        config = self.config

        # Initialize other values
        max_timestamp = 0

        # Get start time for activity
        start_ts = time.time()

        # Sort metadata by timestamp
        metadata.sort()

        # Process file for each timestamp, starting from the oldes file
        for (timestamp, filepath) in metadata:
            # Read in data
            ingest = drain.Drain(filepath)

            # Make sure file is OK
            # Move it to a directory for further analysis
            # by administrators
            if ingest.valid() is False:
                log_message = (
                    'Cache ingest file %s is invalid. Moving.'
                    '') % (filepath)
                log.log2warn(1054, log_message)
                shutil.copy(
                    filepath, config.ingest_failures_directory())
                os.remove(filepath)
                continue

            # Append data
            agent_data['chartable'].extend(ingest.chartable())
            agent_data['unchartable'].extend(ingest.other())
            agent_data['sources'].extend(ingest.sources())
            hostnames.append(ingest.hostname())
            uids.append(ingest.uid())
            ingests.append(ingest)
            agent_names.append(ingest.agent())

            # Purge source file
            # ingest.purge()

            # Get the max timestamp
            max_timestamp = max(timestamp, max_timestamp)

            # Update update flag
            updated = True

        # Verify that we have only processed data for the same hostname
        # UID and agent name
        if (jm_general.all_same(hostnames) is False) or (
                jm_general.all_same(uids) is False) or (
                    jm_general.all_same(agent_names) is False):
            log_message = (
                'Cache ingest files error for hostname %s,'
                'agent name %s, UID %s.'
                '') % (hostnames[0], agent_names[0], uids[0])
            log.log2quiet(1083, log_message)

        # Process the rest
        if updated is True:
            # Update remaining agent data
            agent_data['hostname'] = hostnames[0]
            agent_data['uid'] = uids[0]
            agent_data['agent_name'] = agent_names[0]

            # Update database
            dbase = UpdateDB(agent_data)
            dbase.update()

            # Update the last time the agent was contacted
            _update_agent_last_update(agent_data['uid'], max_timestamp)

            # Update the host / agent table timestamp if
            # hostname was processed
            _host_agent_last_update(
                agent_data['hostname'], agent_data['uid'], max_timestamp)

            # Purge source files. Only done after complete
            # success of database updates. If not we could lose data in the
            # event of an ingester crash. Ingester would re-read the files
            # and process the non-duplicates, while deleting the duplicates.
            for ingest in ingests:
                ingest.purge()

            # Log duration of activity
            duration = time.time() - start_ts
            log_message = (
                'UID %s was processed in %s seconds.'
                '') % (agent_data['uid'], duration)
            log.log2quiet(1127, log_message)