def _process_job(jobdata, splay, seconds, min_splay, base): run = False if 'last_run' not in jobdata: if jobdata.get('run_on_start', False): if splay: # Run `splay` seconds in the future, by telling the scheduler we last ran it # `seconds - splay` seconds ago. jobdata['last_run'] = time.time() - ( seconds - random.randint(min_splay, splay)) else: # Run now run = True jobdata['last_run'] = time.time() else: if splay: # Run `seconds + splay` seconds in the future by telling the scheduler we last # ran it at now + `splay` seconds. jobdata['last_run'] = time.time() + random.randint( min_splay, splay) elif 'buckets' in jobdata: # Place the host in a bucket and fix the execution time. jobdata['last_run'] = getlastrunbybuckets( jobdata['buckets'], seconds) log.debug('last_run according to bucket is %s', jobdata['last_run']) elif 'cron' in jobdata: # execute the hubble process based on cron expression jobdata['last_run'] = getlastrunbycron(base, seconds) else: # Run in `seconds` seconds. jobdata['last_run'] = time.time() if jobdata['last_run'] < time.time() - seconds: run = True return run
def main(): """ Run the main hubble loop """ # Initial fileclient setup _clear_gitfs_locks() # Setup fileclient log.info('Setting up the fileclient/fileserver') retry_count = __opts__.get('fileserver_retry_count_on_startup', None) retry_time = __opts__.get('fileserver_retry_rate_on_startup', 30) count = 0 while True: try: file_client = hubblestack.fileclient.get_file_client(__opts__) file_client.channel.fs.update() last_fc_update = time.time() break except Exception: if (retry_count is None or count < retry_count) and not __opts__['function']: log.exception( 'Exception thrown trying to setup fileclient. ' 'Trying again in %s seconds.', retry_time) count += 1 time.sleep(retry_time) continue else: log.exception( 'Exception thrown trying to setup fileclient. Exiting.') sys.exit(1) # Check for single function run if __opts__['function']: run_function() sys.exit(0) last_grains_refresh = time.time() - __opts__['grains_refresh_frequency'] log.info('Starting main loop') pidfile_count = 0 # pidfile_refresh in seconds, our scheduler deals in half-seconds pidfile_refresh = int(__opts__.get('pidfile_refresh', 60)) * 2 while True: # Check if fileserver needs update if time.time( ) - last_fc_update >= __opts__['fileserver_update_frequency']: last_fc_update = _update_fileserver(file_client) pidfile_count += 1 if __opts__['daemonize'] and pidfile_count > pidfile_refresh: pidfile_count = 0 create_pidfile() if time.time( ) - last_grains_refresh >= __opts__['grains_refresh_frequency']: last_grains_refresh = _emit_and_refresh_grains() try: log.debug('Executing schedule') sf_count = schedule() except Exception as exc: log.exception('Error executing schedule: %s', exc) if isinstance(exc, KeyboardInterrupt): raise exc time.sleep(__opts__.get('scheduler_sleep_frequency', 0.5))
def run_function(): ''' Run a single function requested by the user ''' # Parse the args args = [] kwargs = {} for arg in __opts__['args']: if '=' in arg: kwarg, _, value = arg.partition('=') kwargs[kwarg] = value else: args.append(arg) log.debug('Parsed args: {0} | Parsed kwargs: {1}'.format(args, kwargs)) log.info('Executing user-requested function {0}'.format( __opts__['function'])) try: ret = __salt__[__opts__['function']](*args, **kwargs) except KeyError: log.error('Function {0} is not available, or not valid.'.format( __opts__['function'])) sys.exit(1) if __opts__['return']: returner = '{0}.returner'.format(__opts__['return']) if returner not in __returners__: log.error('Could not find {0} returner.'.format(returner)) else: log.info('Returning job data to {0}'.format(returner)) returner_ret = { 'id': __grains__['id'], 'jid': salt.utils.jid.gen_jid(__opts__), 'fun': __opts__['function'], 'fun_args': args + ([kwargs] if kwargs else []), 'return': ret, 'retry': False } if __opts__.get('returner_retry', False): returner_ret['retry'] = True __returners__[returner](returner_ret) # TODO instantiate the salt outputter system? if __opts__['json_print']: print(json.dumps(ret)) else: if not __opts__['no_pprint']: pprint.pprint(ret) else: print(ret)
def run_function(): """ Run a single function requested by the user """ # Parse the args args = [] kwargs = {} for arg in __opts__['args']: if '=' in arg: kwarg, _, value = arg.partition('=') kwargs[kwarg] = value else: args.append(arg) log.debug('Parsed args: %s | Parsed kwargs: %s', args, kwargs) log.info('Executing user-requested function %s', __opts__['function']) mod_fun = __mods__.get(__opts__['function']) if not mod_fun or not callable(mod_fun): log.error('Function %s is not available, or not valid.', __opts__['function']) sys.exit(1) ret = mod_fun(*args, **kwargs) if __opts__['return']: returner = '{0}.returner'.format(__opts__['return']) if returner not in __returners__: log.error('Could not find %s returner.', returner) else: log.info('Returning job data to %s', returner) returner_ret = { 'id': __grains__['id'], 'jid': hubblestack.utils.jid.gen_jid(__opts__), 'fun': __opts__['function'], 'fun_args': args + ([kwargs] if kwargs else []), 'return': ret } __returners__[returner](returner_ret) # TODO instantiate the salt outputter system? if __opts__['json_print']: print(json.dumps(ret)) else: if not __opts__['no_pprint']: pprint.pprint(ret) else: print(ret)
def run_function(): """ Run a single function requested by the user """ # Parse the args args = [] kwargs = {} for arg in __opts__["args"]: if "=" in arg: kwarg, _, value = arg.partition("=") kwargs[kwarg] = value else: args.append(arg) log.debug("Parsed args: %s | Parsed kwargs: %s", args, kwargs) log.info("Executing user-requested function %s", __opts__["function"]) mod_fun = __mods__.get(__opts__["function"]) if not mod_fun or not callable(mod_fun): log.error("Function %s is not available, or not valid.", __opts__["function"]) sys.exit(1) ret = mod_fun(*args, **kwargs) if __opts__["return"]: returner = "{0}.returner".format(__opts__["return"]) if returner not in __returners__: log.error("Could not find %s returner.", returner) else: log.info("Returning job data to %s", returner) returner_ret = { "id": __grains__["id"], "jid": hubblestack.utils.jid.gen_jid(__opts__), "fun": __opts__["function"], "fun_args": args + ([kwargs] if kwargs else []), "return": ret, } __returners__[returner](returner_ret) # TODO instantiate the salt outputter system? if __opts__["json_print"]: print(json.dumps(ret)) else: if not __opts__["no_pprint"]: pprint.pprint(ret) else: print(ret)
def publish(report_directly_to_splunk=True, remove_dots=True, *args): """ Publishes config to splunk at an interval defined in schedule report_directly_to_splunk Whether to emit directly to splunk in addition to returning as a normal job. Defaults to True. remove_dots Whether to replace dots in top-level keys with underscores for ease of handling in splunk. Defaults to True. *args Tuple of opts to log (keys in __opts__). Only those key-value pairs would be published, keys for which are in *args If not passed, entire __opts__ (excluding password/token) would be published """ log.debug("Started publishing config to splunk") opts_to_log = {} if not args: opts_to_log = copy.deepcopy(__opts__) if "grains" in opts_to_log: opts_to_log.pop("grains") else: for arg in args: if arg in __opts__: opts_to_log[arg] = __opts__[arg] # 'POP' is for tracking persistent opts protection if os.environ.get("NOISY_POP_DEBUG"): log.error("POP config_publish (id=%d)", id(__opts__)) filtered_conf = hubblestack.log.filter_logs(opts_to_log, remove_dots=remove_dots) if report_directly_to_splunk: hubblestack.log.emit_to_splunk(filtered_conf, "INFO", "hubblestack.hubble_config") log.debug("Published config to splunk") return filtered_conf
def publish(report_directly_to_splunk=True, remove_dots=True, *args): ''' Publishes config to splunk at an interval defined in schedule report_directly_to_splunk Whether to emit directly to splunk in addition to returning as a normal job. Defaults to True. remove_dots Whether to replace dots in top-level keys with underscores for ease of handling in splunk. Defaults to True. *args Tuple of opts to log (keys in __opts__). Only those key-value pairs would be published, keys for which are in *args If not passed, entire __opts__ (excluding password/token) would be published ''' log.debug('Started publishing config to splunk') opts_to_log = {} if not args: opts_to_log = copy.deepcopy(__opts__) if 'grains' in opts_to_log: opts_to_log.pop('grains') else: for arg in args: if arg in __opts__: opts_to_log[arg] = __opts__[arg] filtered_conf = hubblestack.log.filter_logs(opts_to_log, remove_dots=remove_dots) if report_directly_to_splunk: hubblestack.log.emit_to_splunk(filtered_conf, 'INFO', 'hubblestack.hubble_config') log.debug('Published config to splunk') return filtered_conf
def getlastrunbybuckets(buckets, seconds): """ this function will use the host's ip to place the host in a bucket where each bucket executes hubble processes at a different time """ buckets = int(buckets) if int(buckets) != 0 else 256 host_ip = socket.gethostbyname(socket.gethostname()) ips = host_ip.split('.') bucket_sum = (int(ips[0]) * 256 * 256 * 256) + (int(ips[1]) * 256 * 256) + \ (int(ips[2]) * 256) + int(ips[3]) bucket = bucket_sum % buckets log.debug('bucket number is %d out of %d', bucket, buckets) current_time = time.time() base_time = seconds * (math.floor(current_time / seconds)) splay = seconds / buckets seconds_between_buckets = splay random_int = random.randint(0, splay - 1) if splay != 0 else 0 bucket_execution_time = base_time + (seconds_between_buckets * bucket) + random_int if bucket_execution_time < current_time: last_run = bucket_execution_time else: last_run = bucket_execution_time - seconds return last_run
def _execute_function(jobdata, func, returners, args, kwargs): """ Run the scheduled function """ log.debug('Executing scheduled function %s', func) jobdata['last_run'] = time.time() ret = __salt__[func](*args, **kwargs) if __opts__['log_level'] == 'debug': log.debug('Job returned:\n%s', ret) for returner in returners: returner = '{0}.returner'.format(returner) if returner not in __returners__: log.error('Could not find %s returner.', returner) continue log.debug('Returning job data to %s', returner) returner_ret = {'id': __grains__['id'], 'jid': salt.utils.jid.gen_jid(__opts__), 'fun': func, 'fun_args': args + ([kwargs] if kwargs else []), 'return': ret} __returners__[returner](returner_ret)
def _execute_function(jobdata, func, returners, args, kwargs): """Run the scheduled function""" log.debug("Executing scheduled function %s", func) jobdata["last_run"] = time.time() ret = __mods__[func](*args, **kwargs) if __opts__["log_level"] == "debug": log.debug("Job returned:\n%s", ret) for returner in returners: returner = "{0}.returner".format(returner) if returner not in __returners__: log.error("Could not find %s returner.", returner) continue log.debug("Returning job data to %s", returner) returner_ret = { "id": __grains__["id"], "jid": hubblestack.utils.jid.gen_jid(__opts__), "fun": func, "fun_args": args + ([kwargs] if kwargs else []), "return": ret, } __returners__[returner](returner_ret)
def loads(self, msg, encoding=None, raw=False): ''' Run the correct loads serialization format :param encoding: Useful for Python 3 support. If the msgpack data was encoded using "use_bin_type=True", this will differentiate between the 'bytes' type and the 'str' type by decoding contents with 'str' type to what the encoding was set as. Recommended encoding is 'utf-8' when using Python 3. If the msgpack data was not encoded using "use_bin_type=True", it will try to decode all 'bytes' and 'str' data (the distinction has been lost in this case) to what the encoding is set as. In this case, it will fail if any of the contents cannot be converted. ''' try: def ext_type_decoder(code, data): if code == 78: data = hubblestack.utils.stringutils.to_unicode(data) return datetime.datetime.strptime(data, '%Y%m%dT%H:%M:%S.%f') return data gc.disable() # performance optimization for msgpack loads_kwargs = {'use_list': True, 'ext_hook': ext_type_decoder} if msgpack.version >= (0, 4, 0): # msgpack only supports 'encoding' starting in 0.4.0. # Due to this, if we don't need it, don't pass it at all so # that under Python 2 we can still work with older versions # of msgpack. if msgpack.version >= (0, 5, 2): if encoding is None: loads_kwargs['raw'] = True else: loads_kwargs['raw'] = False else: loads_kwargs['encoding'] = encoding try: ret = msgpack.loads(msg, **loads_kwargs) except UnicodeDecodeError: # msg contains binary data loads_kwargs.pop('raw', None) loads_kwargs.pop('encoding', None) ret = msgpack.loads(msg, **loads_kwargs) else: ret = msgpack.loads(msg, **loads_kwargs) except Exception as exc: log.critical( 'Could not deserialize msgpack message. This often happens ' 'when trying to read a file not in binary mode. ' 'To see message payload, enable debug logging and retry. ' 'Exception: %s', exc) log.debug('Msgpack deserialization failure on message: %s', msg) gc.collect() raise HubbleDeserializationError( 'Could not deserialize msgpack message.' ' See log for more info.') from exc finally: gc.enable() return ret
def kill_other_or_sys_exit(xpid, hname=r'hubble', ksig=signal.SIGTERM, kill_other=True, no_pgrp=True): """ Attempt to locate other hubbles using a cmdline regular expression and kill them when found. If killing the other processes fails (or kill_other is False), sys.exit instead. params: hname :- the regular expression pattern to use to locate hubble (default: hubble) ksig :- the signal to use to kill the other processes (default: signal.SIGTERM=15) kill_other :- (default: True); when false, don't attempt to kill, just locate and exit (if found) no_pgrp :- Avoid killing processes in this pgrp (avoid suicide). When no_pgrp is True, invoke os.getprgp() to populate the actual value. caveats: There are some detailed notes on the process scanning in the function as comments. The most important caveat is that the hname regular expressions must match expecting that /proc/$$/cmdline text is null separated, not space separated. The other main caveat is that we can't actually examine the /proc/$$/exe file (that's always just a python). We have to scan the invocation text the kernel stored at launch. That text is not immutable and should not (normally) be relied upon for any purpose -- and this method does rely on it. """ if no_pgrp is True: no_pgrp = os.getpgrp() if isinstance(no_pgrp, int): no_pgrp = str(no_pgrp) if os.path.isdir("/proc/{pid}".format(pid=xpid)): # NOTE: we'd prefer to check readlink(/proc/[pid]/exe), but that won't do # any good the /opt/whatever/bin/hubble is normally a text file with a # shebang; which the kernel picks up and uses to execute the real binary # with the "bin" file as an argument; so we'll have to live with cmdline pfile = '/proc/{pid}/cmdline'.format(pid=xpid) log.debug('searching %s for hubble procs matching %s', pfile, hname) with open(pfile, 'r') as pidfile: # NOTE: cmdline is actually null separated, not space separated # that shouldn't matter much for most hname regular expressions, but one never knows. cmdline = pidfile.readline().replace('\x00', ' ').strip() if re.search(hname, cmdline): if no_pgrp: pstatfile = '/proc/{pid}/stat'.format(pid=xpid) with open(pstatfile, 'r') as fh2: # NOTE: man proc(5) § /proc/[pid]/stat # (pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, ...) pgrp = fh2.readline().split()[4] if pgrp == no_pgrp: log.debug( "process (%s) exists and seems to be a hubble, " "but matches our process group (%s), ignoring", xpid, pgrp) return False if kill_other: log.warn( "process seems to still be alive and seems to be hubble," " attempting to shutdown") os.kill(int(xpid), ksig) time.sleep(1) if os.path.isdir("/proc/{pid}".format(pid=xpid)): log.error( "fatal error: failed to shutdown process (pid=%s) successfully", xpid) sys.exit(1) else: return True else: log.error( "refusing to run while another hubble instance is running") sys.exit(1) else: # pidfile present, but nothing at that pid. Did we receive a sigterm? log.warning( 'Pidfile found on startup, but no process at that pid. Did hubble receive a SIGTERM?' ) return False
def main(): ''' Run the main hubble loop ''' # Initial fileclient setup # Clear old locks if 'gitfs' in __opts__['fileserver_backend'] or 'git' in __opts__[ 'fileserver_backend']: git_objects = [ salt.utils.gitfs.GitFS( __opts__, __opts__['gitfs_remotes'], per_remote_overrides=salt.fileserver.gitfs. PER_REMOTE_OVERRIDES, per_remote_only=salt.fileserver.gitfs.PER_REMOTE_ONLY) ] ret = {} for obj in git_objects: lock_type = 'update' cleared, errors = salt.fileserver.clear_lock(obj.clear_lock, 'gitfs', remote=None, lock_type=lock_type) if cleared: ret.setdefault('cleared', []).extend(cleared) if errors: ret.setdefault('errors', []).extend(errors) if ret: log.info('One or more gitfs locks were removed: {0}'.format(ret)) # Setup fileclient log.info('Setting up the fileclient/fileserver') # Set up fileclient retry_count = __opts__.get('fileserver_retry_count_on_startup', None) retry_time = __opts__.get('fileserver_retry_rate_on_startup', 30) count = 0 while True: try: fc = salt.fileclient.get_file_client(__opts__) fc.channel.fs.update() last_fc_update = time.time() break except Exception as exc: if (retry_count is None or count < retry_count) and not __opts__['function']: log.exception( 'Exception thrown trying to setup fileclient. ' 'Trying again in {0} seconds.'.format(retry_time)) count += 1 time.sleep(retry_time) continue else: log.exception( 'Exception thrown trying to setup fileclient. Exiting.') sys.exit(1) # Check for single function run if __opts__['function']: run_function() sys.exit(0) last_grains_refresh = time.time() - __opts__['grains_refresh_frequency'] log.info('Starting main loop') pidfile_count = 0 # pidfile_refresh in seconds, our scheduler deals in half-seconds pidfile_refresh = int(__opts__.get('pidfile_refresh', 60)) * 2 while True: # Check if fileserver needs update if time.time( ) - last_fc_update >= __opts__['fileserver_update_frequency']: try: fc.channel.fs.update() last_fc_update = time.time() except Exception as exc: retry = __opts__.get('fileserver_retry_rate', 900) last_fc_update += retry log.exception('Exception thrown trying to update fileclient. ' 'Trying again in {0} seconds.'.format(retry)) pidfile_count += 1 if __opts__['daemonize'] and pidfile_count > pidfile_refresh: pidfile_count = 0 create_pidfile() if time.time( ) - last_grains_refresh >= __opts__['grains_refresh_frequency']: log.info('Refreshing grains') refresh_grains() last_grains_refresh = time.time() # Emit syslog at grains refresh frequency if not (salt.utils.platform.is_windows()) and __opts__.get( 'emit_grains_to_syslog', True): default_grains_to_emit = [ 'system_uuid', 'hubble_uuid', 'session_uuid', 'machine_id', 'splunkindex', 'cloud_details', 'hubble_version', 'localhost', 'fqdn' ] grains_to_emit = [] grains_to_emit.extend( __opts__.get('emit_grains_to_syslog_list', default_grains_to_emit)) emit_to_syslog(grains_to_emit) try: log.debug('Executing schedule') sf_count = schedule() if sf_count > 0: log.debug('Executed %d schedule item(s)', sf_count) hubblestack.log.workaround_salt_log_handler_queues() except Exception as e: log.exception('Error executing schedule: {0}'.format(e)) if isinstance(e, KeyboardInterrupt): raise e time.sleep(__opts__.get('scheduler_sleep_frequency', 0.5))
def schedule(): ''' Rudimentary single-pass scheduler If we find we miss some of the salt scheduler features we could potentially pull in some of that code. Schedule data should be placed in the config with the following format: .. code-block:: yaml schedule: job1: function: hubble.audit seconds: 3600 splay: 100 min_splay: 50 args: - cis.centos-7-level-1-scored-v2-1-0 kwargs: verbose: True show_profile: True returner: splunk_nova_return run_on_start: True Note that ``args``, ``kwargs``,``min_splay`` and ``splay`` are all optional. However, a scheduled job must always have a ``function`` and a time in ``seconds`` of how often to run the job. function Function to run in the format ``<module>.<function>``. Technically any salt module can be run in this way, but we recommend sticking to hubble functions. For simplicity, functions are run in the main daemon thread, so overloading the scheduler can result in functions not being run in a timely manner. seconds Frequency with which the job should be run, in seconds splay Randomized splay for the job, in seconds. A random number between <min_splay> and <splay> will be chosen and added to the ``seconds`` argument, to decide the true frequency. The splay will be chosen on first run, and will only change when the daemon is restarted. Optional. min_splay This parameters works in conjunction with <splay>. If a <min_splay> is provided, and random between <min_splay> and <splay> is chosen. If <min_splay> is not provided, it defaults to zero. Optional. args List of arguments for the function. Optional. kwargs Dict of keyword arguments for the function. Optional. returner String with a single returner, or list of returners to which we should send the results. Optional. run_on_start Whether to run the scheduled job on daemon start. Defaults to False. Optional. ''' sf_count = 0 base = datetime(2018, 1, 1, 0, 0) schedule_config = __opts__.get('schedule', {}) if 'user_schedule' in __opts__ and isinstance(__opts__['user_schedule'], dict): schedule_config.update(__opts__['user_schedule']) for jobname, jobdata in schedule_config.iteritems(): # Error handling galore if not jobdata or not isinstance(jobdata, dict): log.error( 'Scheduled job {0} does not have valid data'.format(jobname)) continue if 'function' not in jobdata or 'seconds' not in jobdata: log.error('Scheduled job {0} is missing a ``function`` or ' '``seconds`` argument'.format(jobname)) continue func = jobdata['function'] if func not in __salt__: log.error('Scheduled job {0} has a function {1} which could not ' 'be found.'.format(jobname, func)) continue try: if 'cron' in jobdata: seconds = getsecondsbycronexpression(base, jobdata['cron']) else: seconds = int(jobdata['seconds']) splay = int(jobdata.get('splay', 0)) min_splay = int(jobdata.get('min_splay', 0)) except ValueError: log.error('Scheduled job {0} has an invalid value for seconds or ' 'splay.'.format(jobname)) args = jobdata.get('args', []) if not isinstance(args, list): log.error( 'Scheduled job {0} has args not formed as a list: {1}'.format( jobname, args)) kwargs = jobdata.get('kwargs', {}) if not isinstance(kwargs, dict): log.error('Scheduled job {0} has kwargs not formed as a dict: {1}'. format(jobname, kwargs)) returners = jobdata.get('returner', []) if not isinstance(returners, list): returners = [returners] returner_retry = jobdata.get('returner_retry', False) # Actually process the job run = False if 'last_run' not in jobdata: if jobdata.get('run_on_start', False): if splay: # Run `splay` seconds in the future, by telling the scheduler we last ran it # `seconds - splay` seconds ago. jobdata['last_run'] = time.time() - ( seconds - random.randint(min_splay, splay)) else: # Run now run = True jobdata['last_run'] = time.time() else: if splay: # Run `seconds + splay` seconds in the future by telling the scheduler we last # ran it at now + `splay` seconds. jobdata['last_run'] = time.time() + random.randint( min_splay, splay) elif 'buckets' in jobdata: # Place the host in a bucket and fix the execution time. jobdata['last_run'] = getlastrunbybuckets( jobdata['buckets'], seconds) log.debug('last_run according to bucket is {0}'.format( jobdata['last_run'])) elif 'cron' in jobdata: # execute the hubble process based on cron expression jobdata['last_run'] = getlastrunbycron(base, seconds) else: # Run in `seconds` seconds. jobdata['last_run'] = time.time() if jobdata['last_run'] < time.time() - seconds: run = True if run: log.debug('Executing scheduled function {0}'.format(func)) jobdata['last_run'] = time.time() ret = __salt__[func](*args, **kwargs) sf_count += 1 if __opts__['log_level'] == 'debug': log.debug('Job returned:\n{0}'.format(ret)) for returner in returners: returner = '{0}.returner'.format(returner) if returner not in __returners__: log.error('Could not find {0} returner.'.format(returner)) continue log.debug('Returning job data to {0}'.format(returner)) returner_ret = { 'id': __grains__['id'], 'jid': salt.utils.jid.gen_jid(__opts__), 'fun': func, 'fun_args': args + ([kwargs] if kwargs else []), 'return': ret, 'retry': returner_retry } __returners__[returner](returner_ret) return sf_count