def __init__(self,
              creds_conf,
              cmds_conf,
              parsers_dir,
              output_type,
              output_addr,
              max_worker_threads=1,
              use_threads=True,
              num_threads_per_worker=10):
     self.workers = {}
     self.working = set()
     self.host_mgr = host_manager.HostManager(credentials=creds_conf,
                                              commands=cmds_conf)
     self.parser_mgr = parser_manager.ParserManager(parser_dirs=parsers_dir)
     self.collector = collector.Collector(self.host_mgr, self.parser_mgr,
                                          output_type, output_addr)
     self.max_worker_threads = max_worker_threads
     self.output_type = output_type
     self.output_addr = output_addr
     self.use_threads = use_threads
     self.num_threads_per_worker = num_threads_per_worker
     # default worker that is started if there are no hosts to schedule
     self.default_worker = Worker(120, self.collector, self.output_type,
                                  self.output_addr, self.use_threads,
                                  self.num_threads_per_worker)
     self.default_worker.set_name('Default-120sec')
Esempio n. 2
0
def main():

    time_start = time.time()

    ### ------------------------------------------------------------------------------
    ### Create and Parse Arguments
    ### -----------------------------------------------------------------------------
    # if getattr(sys, 'frozen', False):
    #     # frozen
    #     BASE_DIR = os.path.dirname(sys.executable)
    # else:
    #     # unfrozen
    #     BASE_DIR = os.path.dirname(os.path.realpath(__file__))

    BASE_DIR = os.getcwd()

    full_parser = argparse.ArgumentParser()
    full_parser.add_argument(
        "--tag",
        nargs='+',
        help="Collect data from hosts that matches the tag")
    full_parser.add_argument(
        "--cmd-tag",
        nargs='+',
        help="Collect data from command that matches the tag")

    full_parser.add_argument("-s",
                             "--start",
                             action='store_true',
                             help="Start collecting (default 'no')")

    full_parser.add_argument("--loglvl",
                             default=20,
                             help="Logs verbosity, 10-debug, 50 Critical")

    full_parser.add_argument("--logdir",
                             default="",
                             help="Directory where to store logs")

    full_parser.add_argument(
        "--sharding",
        help=
        "Define if the script is part of a shard need to include the place in the shard and the size of the shard [0/3]"
    )
    full_parser.add_argument(
        "--sharding-offset",
        default=True,
        help="Define an offset needs to be applied to the shard_id")

    full_parser.add_argument("--parserdir",
                             default="parsers",
                             help="Directory where to find parsers")
    full_parser.add_argument(
        "--collector-timeout",
        default=15,
        help="Timeout for collector device rpc/rest calls")
    full_parser.add_argument("--retry", default=5, help="Max retry")

    full_parser.add_argument("--host", default=None, help="Host DNS or IP")
    full_parser.add_argument("--hosts",
                             default="hosts.yaml",
                             help="Hosts file in yaml")
    full_parser.add_argument("--commands",
                             default="commands.yaml",
                             help="Commands file in Yaml")
    full_parser.add_argument("--credentials",
                             default="credentials.yaml",
                             help="Credentials file in Yaml")

    full_parser.add_argument(
        "--no-facts",
        action='store_false',
        help=
        "Disable facts collection on device (remove version and product name in results)"
    )

    full_parser.add_argument("--output-format",
                             default="influxdb",
                             help="Format of the output")
    full_parser.add_argument("--output-type",
                             default="stdout",
                             choices=['stdout', 'http'],
                             help="Type of output")
    full_parser.add_argument("--output-addr",
                             default="http://localhost:8186/write",
                             help="Addr information for output action")

    full_parser.add_argument(
        "--no-collector-threads",
        action='store_true',
        help=
        "Dont Spawn multiple threads to collect the information on the devices"
    )
    full_parser.add_argument(
        "--nbr-collector-threads",
        type=int,
        default=10,
        help="Maximum number of collector thread to spawn (default 10)")
    full_parser.add_argument(
        "--max-worker-threads",
        type=int,
        default=1,
        help="Maximum number of worker threads per interval for scheduler")
    full_parser.add_argument("--use-scheduler",
                             action='store_true',
                             help="Use scheduler")
    full_parser.add_argument(
        "--hosts-refresh-interval",
        type=int,
        default=3 * 60 * 60,
        help="Interval to periodically refresh dynamic host inventory")
    full_parser.add_argument("--allow-zero-hosts",
                             action='store_true',
                             help="Allow scheduler to run even with 0 hosts")

    dynamic_args = vars(full_parser.parse_args())

    # Print help if no parameters are provided
    if len(sys.argv) == 1:
        full_parser.print_help()
        sys.exit(1)

    ### ------------------------------------------------------------------------------
    # Loading YAML Default Variables
    ### ------------------------------------------------------------------------------
    max_connection_retries = dynamic_args['retry']
    logging_level = int(dynamic_args['loglvl'])

    ### ------------------------------------------------------------------------------
    ### Validate Arguments
    ### ------------------------------------------------------------------------------
    pp = pprint.PrettyPrinter(indent=4)

    tag_list = []
    ###  Known and fixed arguments
    if dynamic_args['tag']:
        tag_list = dynamic_args['tag']
    else:
        tag_list = [".*"]

    if not (dynamic_args['start']):
        print('Missing <start> option, so nothing to do')
        sys.exit(0)

    ### ------------------------------------------------------------------------------
    ### Logging
    ### ------------------------------------------------------------------------------
    formatter = logging.Formatter(
        '%(asctime)s %(name)s: %(levelname)s:  %(message)s')
    sh = logging.StreamHandler()
    sh.setFormatter(formatter)
    handlers = [sh]
    if dynamic_args['logdir']:
        log_dir = BASE_DIR + "/" + dynamic_args['logdir']
        ## Check that logs directory exist, create it if needed
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        filename = log_dir + "/" + 'metric_collector.log',
        fh = logging.handlers.RotatingFileHandler(filename,
                                                  maxSize=10 * 1024 * 1024,
                                                  backupCount=5)
        fh.setFormatter(formatter)
        handlers.append(fh)

    logging.basicConfig(level=logging_level, handlers=handlers)

    ### ------------------------------------------------------------------------------
    ### LOAD all credentials in a dict
    ### ------------------------------------------------------------------------------
    credentials = {}
    credentials_yaml_file = ''

    if os.path.isfile(dynamic_args['credentials']):
        credentials_yaml_file = dynamic_args['credentials']
    else:
        credentials_yaml_file = BASE_DIR + "/" + dynamic_args['credentials']

    logger.info('Importing credentials file: %s ', credentials_yaml_file)
    try:
        with open(credentials_yaml_file) as f:
            credentials = yaml.full_load(f)
    except Exception as e:
        logger.error('Error importing credentials file: %s: %s',
                     credentials_yaml_file, str(e))
        sys.exit(0)

    ### ------------------------------------------------------------------------------
    ### LOAD all commands with their tags in a dict
    ### ------------------------------------------------------------------------------
    commands_yaml_file = ''
    commands = []

    if os.path.isfile(dynamic_args['commands']):
        commands_yaml_file = dynamic_args['commands']
    else:
        commands_yaml_file = BASE_DIR + "/" + dynamic_args['commands']

    logger.info('Importing commands file: %s ', commands_yaml_file)
    with open(commands_yaml_file) as f:
        try:
            for document in yaml.load_all(f, yaml.FullLoader):
                commands.append(document)
        except Exception as e:
            logger.error('Error importing commands file: %s, %s',
                         commands_yaml_file, str(e))
            sys.exit(0)

    general_commands = commands[0]

    use_threads = not (dynamic_args['no_collector_threads'])

    if dynamic_args['cmd_tag']:
        command_tags = dynamic_args['cmd_tag']
    else:
        command_tags = ['.*']

    sharding = dynamic_args.get('sharding')
    sharding_offset = dynamic_args.get('sharding_offset')
    max_worker_threads = dynamic_args.get('max_worker_threads', 1)
    max_collector_threads = dynamic_args.get('nbr_collector_threads')

    if dynamic_args.get('use_scheduler', False):
        device_scheduler = scheduler.Scheduler(
            credentials,
            general_commands,
            dynamic_args['parserdir'],
            dynamic_args['output_type'],
            dynamic_args['output_addr'],
            max_worker_threads=max_worker_threads,
            use_threads=use_threads,
            num_threads_per_worker=max_collector_threads,
            collector_timeout=dynamic_args['collector_timeout'])
        hri = dynamic_args.get('hosts_refresh_interval', 6 * 60 * 60)
        select_hosts(
            dynamic_args['hosts'],
            tag_list,
            sharding,
            sharding_offset,
            scheduler=device_scheduler,
            refresh_interval=float(hri),
            allow_zero_hosts=dynamic_args.get('allow_zero_hosts', False),
        )
        device_scheduler.start()  # blocking call
        return

    ### ------------------------------------------------------------------------------
    ### LOAD all parsers
    ### ------------------------------------------------------------------------------
    parsers_manager = parser_manager.ParserManager(
        parser_dirs=dynamic_args['parserdir'])
    hosts_conf = select_hosts(dynamic_args['hosts'], tag_list, sharding,
                              sharding_offset)
    hosts_manager = host_manager.HostManager(credentials=credentials,
                                             commands=general_commands)
    hosts_manager.update_hosts(hosts_conf)
    coll = collector.Collector(hosts_manager=hosts_manager,
                               parser_manager=parsers_manager,
                               output_type=dynamic_args['output_type'],
                               output_addr=dynamic_args['output_addr'],
                               collect_facts=dynamic_args.get(
                                   'no_facts', True),
                               timeout=dynamic_args['collector_timeout'])
    target_hosts = hosts_manager.get_target_hosts(tags=tag_list)

    if use_threads:
        target_hosts_lists = [
            target_hosts[x:x +
                         int(len(target_hosts) / max_collector_threads + 1)]
            for x in range(0, len(target_hosts),
                           int(len(target_hosts) / max_collector_threads + 1))
        ]

        jobs = []

        for (i, target_hosts_list) in enumerate(target_hosts_lists, 1):
            logger.info(
                'Collector Thread-%s scheduled with following hosts: %s', i,
                target_hosts_list)
            thread = threading.Thread(target=coll.collect,
                                      args=('global', ),
                                      kwargs={
                                          "hosts": target_hosts_list,
                                          "cmd_tags": command_tags
                                      })
            jobs.append(thread)
            i = i + 1

        # Start the threads
        for j in jobs:
            j.start()

        # Ensure all of the threads have finished
        for j in jobs:
            j.join()

    else:
        # Execute everythings in the main thread
        coll.collect('global', hosts=target_hosts, cmd_tags=command_tags)

    ### -----------------------------------------------------
    ### Collect Global Statistics
    ### -----------------------------------------------------
    time_end = time.time()
    time_execution = time_end - time_start

    global_datapoint = [{
        'measurement': global_measurement_prefix + '_stats_agent',
        'tags': {},
        'fields': {
            'execution_time_sec': "%.4f" % time_execution,
            'nbr_devices': len(target_hosts)
        },
        'timestamp': time.time_ns(),
    }]

    if 'sharding' in dynamic_args and dynamic_args['sharding'] != None:
        global_datapoint[0]['tags']['sharding'] = dynamic_args['sharding']

    if use_threads:
        global_datapoint[0]['fields']['nbr_threads'] = dynamic_args[
            'nbr_collector_threads']

    ### Send results to the right output
    try:
        if dynamic_args['output_type'] == 'stdout':
            utils.print_format_influxdb(global_datapoint)
        elif dynamic_args['output_type'] == 'http':
            utils.post_format_influxdb(
                global_datapoint,
                dynamic_args['output_addr'],
            )
        else:
            logger.warn('Output format unknown: %s',
                        dynamic_args['output_type'])
    except Exception as ex:
        logger.warn("Hit error trying to post to influx: ", str(ex))