def __init__(self, creds_conf, cmds_conf, parsers_dir, output_type, output_addr, max_worker_threads=1, use_threads=True, num_threads_per_worker=10): self.workers = {} self.working = set() self.host_mgr = host_manager.HostManager(credentials=creds_conf, commands=cmds_conf) self.parser_mgr = parser_manager.ParserManager(parser_dirs=parsers_dir) self.collector = collector.Collector(self.host_mgr, self.parser_mgr, output_type, output_addr) self.max_worker_threads = max_worker_threads self.output_type = output_type self.output_addr = output_addr self.use_threads = use_threads self.num_threads_per_worker = num_threads_per_worker # default worker that is started if there are no hosts to schedule self.default_worker = Worker(120, self.collector, self.output_type, self.output_addr, self.use_threads, self.num_threads_per_worker) self.default_worker.set_name('Default-120sec')
def main(): time_start = time.time() ### ------------------------------------------------------------------------------ ### Create and Parse Arguments ### ----------------------------------------------------------------------------- # if getattr(sys, 'frozen', False): # # frozen # BASE_DIR = os.path.dirname(sys.executable) # else: # # unfrozen # BASE_DIR = os.path.dirname(os.path.realpath(__file__)) BASE_DIR = os.getcwd() full_parser = argparse.ArgumentParser() full_parser.add_argument( "--tag", nargs='+', help="Collect data from hosts that matches the tag") full_parser.add_argument( "--cmd-tag", nargs='+', help="Collect data from command that matches the tag") full_parser.add_argument("-s", "--start", action='store_true', help="Start collecting (default 'no')") full_parser.add_argument("--loglvl", default=20, help="Logs verbosity, 10-debug, 50 Critical") full_parser.add_argument("--logdir", default="", help="Directory where to store logs") full_parser.add_argument( "--sharding", help= "Define if the script is part of a shard need to include the place in the shard and the size of the shard [0/3]" ) full_parser.add_argument( "--sharding-offset", default=True, help="Define an offset needs to be applied to the shard_id") full_parser.add_argument("--parserdir", default="parsers", help="Directory where to find parsers") full_parser.add_argument( "--collector-timeout", default=15, help="Timeout for collector device rpc/rest calls") full_parser.add_argument("--retry", default=5, help="Max retry") full_parser.add_argument("--host", default=None, help="Host DNS or IP") full_parser.add_argument("--hosts", default="hosts.yaml", help="Hosts file in yaml") full_parser.add_argument("--commands", default="commands.yaml", help="Commands file in Yaml") full_parser.add_argument("--credentials", default="credentials.yaml", help="Credentials file in Yaml") full_parser.add_argument( "--no-facts", action='store_false', help= "Disable facts collection on device (remove version and product name in results)" ) full_parser.add_argument("--output-format", default="influxdb", help="Format of the output") full_parser.add_argument("--output-type", default="stdout", choices=['stdout', 'http'], help="Type of output") full_parser.add_argument("--output-addr", default="http://localhost:8186/write", help="Addr information for output action") full_parser.add_argument( "--no-collector-threads", action='store_true', help= "Dont Spawn multiple threads to collect the information on the devices" ) full_parser.add_argument( "--nbr-collector-threads", type=int, default=10, help="Maximum number of collector thread to spawn (default 10)") full_parser.add_argument( "--max-worker-threads", type=int, default=1, help="Maximum number of worker threads per interval for scheduler") full_parser.add_argument("--use-scheduler", action='store_true', help="Use scheduler") full_parser.add_argument( "--hosts-refresh-interval", type=int, default=3 * 60 * 60, help="Interval to periodically refresh dynamic host inventory") full_parser.add_argument("--allow-zero-hosts", action='store_true', help="Allow scheduler to run even with 0 hosts") dynamic_args = vars(full_parser.parse_args()) # Print help if no parameters are provided if len(sys.argv) == 1: full_parser.print_help() sys.exit(1) ### ------------------------------------------------------------------------------ # Loading YAML Default Variables ### ------------------------------------------------------------------------------ max_connection_retries = dynamic_args['retry'] logging_level = int(dynamic_args['loglvl']) ### ------------------------------------------------------------------------------ ### Validate Arguments ### ------------------------------------------------------------------------------ pp = pprint.PrettyPrinter(indent=4) tag_list = [] ### Known and fixed arguments if dynamic_args['tag']: tag_list = dynamic_args['tag'] else: tag_list = [".*"] if not (dynamic_args['start']): print('Missing <start> option, so nothing to do') sys.exit(0) ### ------------------------------------------------------------------------------ ### Logging ### ------------------------------------------------------------------------------ formatter = logging.Formatter( '%(asctime)s %(name)s: %(levelname)s: %(message)s') sh = logging.StreamHandler() sh.setFormatter(formatter) handlers = [sh] if dynamic_args['logdir']: log_dir = BASE_DIR + "/" + dynamic_args['logdir'] ## Check that logs directory exist, create it if needed if not os.path.exists(log_dir): os.makedirs(log_dir) filename = log_dir + "/" + 'metric_collector.log', fh = logging.handlers.RotatingFileHandler(filename, maxSize=10 * 1024 * 1024, backupCount=5) fh.setFormatter(formatter) handlers.append(fh) logging.basicConfig(level=logging_level, handlers=handlers) ### ------------------------------------------------------------------------------ ### LOAD all credentials in a dict ### ------------------------------------------------------------------------------ credentials = {} credentials_yaml_file = '' if os.path.isfile(dynamic_args['credentials']): credentials_yaml_file = dynamic_args['credentials'] else: credentials_yaml_file = BASE_DIR + "/" + dynamic_args['credentials'] logger.info('Importing credentials file: %s ', credentials_yaml_file) try: with open(credentials_yaml_file) as f: credentials = yaml.full_load(f) except Exception as e: logger.error('Error importing credentials file: %s: %s', credentials_yaml_file, str(e)) sys.exit(0) ### ------------------------------------------------------------------------------ ### LOAD all commands with their tags in a dict ### ------------------------------------------------------------------------------ commands_yaml_file = '' commands = [] if os.path.isfile(dynamic_args['commands']): commands_yaml_file = dynamic_args['commands'] else: commands_yaml_file = BASE_DIR + "/" + dynamic_args['commands'] logger.info('Importing commands file: %s ', commands_yaml_file) with open(commands_yaml_file) as f: try: for document in yaml.load_all(f, yaml.FullLoader): commands.append(document) except Exception as e: logger.error('Error importing commands file: %s, %s', commands_yaml_file, str(e)) sys.exit(0) general_commands = commands[0] use_threads = not (dynamic_args['no_collector_threads']) if dynamic_args['cmd_tag']: command_tags = dynamic_args['cmd_tag'] else: command_tags = ['.*'] sharding = dynamic_args.get('sharding') sharding_offset = dynamic_args.get('sharding_offset') max_worker_threads = dynamic_args.get('max_worker_threads', 1) max_collector_threads = dynamic_args.get('nbr_collector_threads') if dynamic_args.get('use_scheduler', False): device_scheduler = scheduler.Scheduler( credentials, general_commands, dynamic_args['parserdir'], dynamic_args['output_type'], dynamic_args['output_addr'], max_worker_threads=max_worker_threads, use_threads=use_threads, num_threads_per_worker=max_collector_threads, collector_timeout=dynamic_args['collector_timeout']) hri = dynamic_args.get('hosts_refresh_interval', 6 * 60 * 60) select_hosts( dynamic_args['hosts'], tag_list, sharding, sharding_offset, scheduler=device_scheduler, refresh_interval=float(hri), allow_zero_hosts=dynamic_args.get('allow_zero_hosts', False), ) device_scheduler.start() # blocking call return ### ------------------------------------------------------------------------------ ### LOAD all parsers ### ------------------------------------------------------------------------------ parsers_manager = parser_manager.ParserManager( parser_dirs=dynamic_args['parserdir']) hosts_conf = select_hosts(dynamic_args['hosts'], tag_list, sharding, sharding_offset) hosts_manager = host_manager.HostManager(credentials=credentials, commands=general_commands) hosts_manager.update_hosts(hosts_conf) coll = collector.Collector(hosts_manager=hosts_manager, parser_manager=parsers_manager, output_type=dynamic_args['output_type'], output_addr=dynamic_args['output_addr'], collect_facts=dynamic_args.get( 'no_facts', True), timeout=dynamic_args['collector_timeout']) target_hosts = hosts_manager.get_target_hosts(tags=tag_list) if use_threads: target_hosts_lists = [ target_hosts[x:x + int(len(target_hosts) / max_collector_threads + 1)] for x in range(0, len(target_hosts), int(len(target_hosts) / max_collector_threads + 1)) ] jobs = [] for (i, target_hosts_list) in enumerate(target_hosts_lists, 1): logger.info( 'Collector Thread-%s scheduled with following hosts: %s', i, target_hosts_list) thread = threading.Thread(target=coll.collect, args=('global', ), kwargs={ "hosts": target_hosts_list, "cmd_tags": command_tags }) jobs.append(thread) i = i + 1 # Start the threads for j in jobs: j.start() # Ensure all of the threads have finished for j in jobs: j.join() else: # Execute everythings in the main thread coll.collect('global', hosts=target_hosts, cmd_tags=command_tags) ### ----------------------------------------------------- ### Collect Global Statistics ### ----------------------------------------------------- time_end = time.time() time_execution = time_end - time_start global_datapoint = [{ 'measurement': global_measurement_prefix + '_stats_agent', 'tags': {}, 'fields': { 'execution_time_sec': "%.4f" % time_execution, 'nbr_devices': len(target_hosts) }, 'timestamp': time.time_ns(), }] if 'sharding' in dynamic_args and dynamic_args['sharding'] != None: global_datapoint[0]['tags']['sharding'] = dynamic_args['sharding'] if use_threads: global_datapoint[0]['fields']['nbr_threads'] = dynamic_args[ 'nbr_collector_threads'] ### Send results to the right output try: if dynamic_args['output_type'] == 'stdout': utils.print_format_influxdb(global_datapoint) elif dynamic_args['output_type'] == 'http': utils.post_format_influxdb( global_datapoint, dynamic_args['output_addr'], ) else: logger.warn('Output format unknown: %s', dynamic_args['output_type']) except Exception as ex: logger.warn("Hit error trying to post to influx: ", str(ex))