async def start(self): self.__ctx = zmq.asyncio.Context() self.__socket = self.__ctx.socket(zmq.SUB) # in compat mode; there is NO subscribe keypair data key_pair = self.cfg.get_zmq_subscribe_keypair() if key_pair is not None: self.__socket.curve_serverkey = key_pair.strip( ) # the first line is the public key # for the client socket, the curve key pair will be regenerated all the time # the important part for "authentication" is the server "public" key self.__socket.curve_publickey, self.__socket.curve_secretkey = zmq.curve_keypair( ) else: logger.info( "no ZMQ subscriber keypair exists; assuming compat mode") self.__socket.connect(get_zmq_broker_xsub_endpoint(self.cfg)) # Subscribe to all topics self.__socket.subscribe('') logger.info("filewave server event subscriber has started") asyncio.create_task(self.forward_events())
async def create_program_and_run_it(): init_logging() prog = MainRuntime(logger) prog.init_services() start_http_server(8000) host = prog.cfg.get_fw_api_server_hostname() poll_interval = prog.cfg.get_polling_delay_seconds() logger.info( f"Extra Metrics - connecting to {host}, using poll interval of {poll_interval} sec" ) # fetches and stores the current server version, this is important as the REST API's depend on this # information. Prior to 14.2.0 some of the API calls were different and 14.2.0 put them all under ../api/v1/... prog.fw_query.fetch_server_version() log_config_summary(prog.cfg, prog.fw_query.major_version, prog.fw_query.minor_version, prog.fw_query.patch_version) if prog.fw_query.major_version is None or prog.fw_query.major_version == 0: logger.error("Unable to reach FileWave server, aborting...") return p = Periodic(poll_interval, prog.validate_and_collect_data) await p.start() while (True): if prog.rerun_data_collection: await prog.validate_and_collect_data() await asyncio.sleep(1)
def read_config_helper(cfg): logger.info( f"loading the configuration from file {ExtraMetricsConfiguration.DEFAULT_CFG_FILE_LOCATION}" ) with open(ExtraMetricsConfiguration.DEFAULT_CFG_FILE_LOCATION, 'r') as f: cfg.read_configuration(f) return True return False
def provision_dashboards_into_grafana(fw_server_dns_name): # if the expected dashboards DO NOT exist in the right path, moan about this and go ahead and copy them.. grafana_dashboard_deployment_dir = os.path.join( "/usr/local/etc/filewave/grafana/provisioning", "dashboards") if not os.path.exists(grafana_dashboard_deployment_dir): logger.error( f"The Grafana dashboard deployment directory ({grafana_dashboard_deployment_dir}) does not exist; is this version 14+ of FileWave?") return # check each file is there... overwrite regardless (helps on upgrade I suppose) for dashboard_file in pkg_resources.resource_listdir("extra_metrics", "dashboards"): if dashboard_file.endswith(".json"): data = pkg_resources.resource_string( "extra_metrics.dashboards", dashboard_file).decode('utf-8') provisioning_file = os.path.join( grafana_dashboard_deployment_dir, dashboard_file) with open(provisioning_file, 'w+') as f: # load up the dashboard and replace the ${VAR_SERVER} with our config value of the external DNS new_data = data.replace('${VAR_SERVER}', fw_server_dns_name) f.write(new_data) logger.info(f"wrote dashboard file: {provisioning_file}")
def event_callback(self, topic, payload): self.logger.info(f"event received: {topic}") interesting_topics = [ "/server/update_model_finished", "/api/auditlog", ] debug_topics = interesting_topics + [ "/inventory/inventory_query_changed", "/server/change_packets", "/client/" ] for test_topic in debug_topics: if topic.startswith(test_topic): pretty_print_json = json.dumps(payload, indent=4) logger.info(f"topic: {topic}") logger.info(f"payload: {pretty_print_json}") if topic in interesting_topics: if topic == "/api/auditlog" and "Report Created" not in payload[ "message"]: return # set a flag in state indicating that the data collection should run imminently logger.info(f"topic {topic} fired; will re-queue data collection") self.rerun_data_collection = True
def collect_patch_data_per_device(self): j = self.fw_query.get_client_info_j() if j is None: logger.warning( "No info returned from the get_client_info_j query - thats not good" ) return if "values" not in j or len(j["values"]) == 0: logger.info( "no results for software update patch status per device received from FileWave server" ) return None if "fields" not in j: logger.info( "no fields meta data for software update patch status per device received from FileWave server" ) return None # use a list of devices, pick up the data from the software update / patching module and fill # in the metric. df = pd.DataFrame(j["values"], columns=j["fields"]) ru = df.groupby(["Client_filewave_client_name", "Client_filewave_id"], as_index=False) for key, item in ru: client_name = key[0] client_id = int(key[1]) obj = self.get_perdevice_state(client_id) obj.client_name = client_name # gets the critical patch count per_device_critical = obj.get_counter(True) # gets the non-critical patch count per_device_normal = obj.get_counter(False) logger.info( f"patches, device: {client_name}/{client_id}, critical: {per_device_critical.total()}, normal: {per_device_normal.total()}" ) software_updates_remaining_by_device.labels( client_name, client_id, True).set(per_device_critical.total_assigned_and_unassigned()) software_updates_remaining_by_device.labels( client_name, client_id, False).set(per_device_normal.total_assigned_and_unassigned())
def collect_patch_data_status(self): j = self.fw_query.get_software_updates_web_ui_j() if j is None: return if "results" not in j or len(j["results"]) == 0: logger.info( "no results for software update patch status received from FileWave server" ) return None values = [] ''' source information: 1. every update, with Id references to state of clients in sub-lists { "id": 174, "unique_hash": "2-d9094dcca7459cbd5f50bdca34c20646", "name": "macOS Catalina 10.15.5 Update", "update_id": "001-12343", "version": " ", "platform": "0", "size": 4755504063, "install_size": 4755504063, "critical": false, "reboot": true, "approved": false, "automatic": false, "creation_date": "2020-05-28T21:34:59+02:00", "metadata": {}, "import_error": null, "import_status": 3, "count_requested": 1, "unassigned_devices": { "count": 0, "device_ids": [] }, "assigned_devices": { "assigned": { "count": 1, "device_ids": [ 11 ] }, "warning": { "count": 0, "device_ids": [] }, "remaining": { "count": 0, "device_ids": [] }, "completed": { "count": 1, "device_ids": [ 11 ] }, "error": { "count": 0, "device_ids": [] } } } ''' columns = [ "update_name", "update_id", "update_pk", "creation_date", "age_in_days", "critical", "platform", "requested", "unassigned", "assigned", "completed", "remaining", "warning", "error", "is_completed" ] res = j["results"] now = datetime.datetime.now(timezone.utc) self.reset_perdevice_state() ''' IMPORTANT: assigned_devices: - all states EXCEPT assigned under this are mutually exclusive, count_requested is the complete total. - is_completed = num_requested > 0 and num_unassigned == 0 and num_remaining == 0 ''' for item in res: update_id = item['update_id'] update_pk = item['id'] acc = item["assigned_devices"] update_name = item["name"] creation_date = item["creation_date"] platform = item["platform"] if platform == "macOS" or platform == "0": update_name += f" ({update_id})" is_critical = item["critical"] num_requested = item["count_requested"] num_unassigned = item["unassigned_devices"]["count"] num_remaining = acc["remaining"]["count"] num_assigned = acc["assigned"]["count"] num_completed = acc["completed"]["count"] num_warning = acc["warning"]["count"] num_error = acc["error"]["count"] is_completed = num_requested > 0 and num_unassigned == 0 and num_remaining == 0 self.apply_unassigned_counts_to_perdevice_state( item["unassigned_devices"], is_critical) self.apply_assigned_counts_to_perdevice_state(acc, is_critical) age_in_days = 99 if creation_date is not None: try: date_value = datetime.datetime.strptime( creation_date, '%Y-%m-%dT%H:%M:%S%z') except ValueError: date_value = datetime.datetime.strptime( creation_date, '%Y-%m-%dT%H:%M:%S.%f%z') delta = now - date_value age_in_days = delta.days values.append([ update_name, update_id, update_pk, creation_date, age_in_days, is_critical, platform, num_requested, num_unassigned, num_assigned, num_completed, num_remaining, num_warning, num_error, is_completed ]) df = pd.DataFrame(values, columns=columns) platform_mapping = {"0": "macOS", "1": "Microsoft"} # for platform/criticality df_crit = df.groupby(['platform', 'critical']) for key, item in df_crit: platform_str = key[0] if platform_str in platform_mapping: platform_str = platform_mapping[platform_str] is_crit = key[1] total_count = item['update_id'].count() software_updates_by_critical.labels(platform_str, is_crit).set(total_count) # calculate the outstanding updates, e.g. patches with highest number of clients outstanding, which is: # unassigned + assigned + remaining # df_not_completed = df.loc[df['is_completed'] == False] per_update_totals = df.groupby(["update_name", "update_pk"], as_index=False) for key, item in per_update_totals: update_name = key[0] update_pk = str(key[1]) num_not_started = item['unassigned'].sum() num_outstanding = item['remaining'].sum() num_completed = item['completed'].sum() num_with_error_or_warning = item['warning'].sum( ) + item['error'].sum() # print(f"update: {update_name} / {update_pk}, in progress: {num_outstanding}") software_updates_by_popularity.labels( update_name, update_pk, "Not Started").set(num_not_started) software_updates_by_popularity.labels( update_name, update_pk, "In Progress").set(num_outstanding) software_updates_by_popularity.labels( update_name, update_pk, "Completed").set(num_completed) software_updates_by_popularity.labels( update_name, update_pk, "Errors/Warnings").set(num_with_error_or_warning) software_updates_by_age.labels(update_name, update_pk, item['creation_date']).set( item['age_in_days']) t = df.sum(0, numeric_only=True) # total number of devices requesting software... software_updates_by_state.labels('Requested').set(t['requested']) # total number not assigned to any device, even though its requested software_updates_by_state.labels('Unassigned').set(t['unassigned']) # breakdown of totals for patches that have been assigned... # assigned -> remaining (installing) -> completed # -> error|warning software_updates_by_state.labels('Assigned').set(t['assigned']) software_updates_by_state.labels('Remaining').set(t['remaining']) software_updates_by_state.labels('Completed').set(t['completed']) software_updates_by_state.labels('Warning').set(t['warning']) software_updates_by_state.labels('Error').set(t['error']) self.collect_patch_data_per_device() return j
def my_check(yes_or_no, file_path): logger.info("checking file path:", file_path) return yes_or_no
def collect_client_data(self, soft_patches): Client_device_name = 0 Client_free_disk_space = 2 Client_filewave_id = 10 Client_last_check_in = 17 DesktopClient_filewave_model_number = 18 Client_total_disk_space = 24 OperatingSystem_name = 13 j = self.fw_query.get_client_info_j() try: assert j["fields"] assert j["fields"][ Client_device_name] == "Client_device_name", f"field {Client_device_name} is expected to be the Client's name" assert j["fields"][ Client_last_check_in] == "Client_last_check_in", f"field {Client_last_check_in} is expected to be the Client's last check in date/time" assert j["fields"][ Client_filewave_id] == "Client_filewave_id", f"field {Client_filewave_id} is expected to be the Client's filewave_id" assert j["fields"][ OperatingSystem_name] == "OperatingSystem_name", f"field {OperatingSystem_name} is supposed to be OperatingSystem_name" buckets = [0, 0, 0, 0] now = datetime.datetime.now() df = pd.DataFrame(j["values"], columns=j["fields"]) # devices by client_version for item in self._rollup_by_single_column_count_client_filewave_id( df, "DesktopClient_filewave_client_version").to_numpy(): version = item[0] total_count = item[1] if version is None: version = "Not Reported" device_client_version.labels(version).set(total_count) logger.info(f"device client version: {version}, {total_count}") # roll up devices per platform for item in self._rollup_by_single_column_count_client_filewave_id( df, "OperatingSystem_name").to_numpy(): (a, b) = self._set_metric_pair(device_client_platform, item) logger.info(f"device platform: {a}, {b}") # roll up devices by 'tracking enabled' or not for item in self._rollup_by_single_column_count_client_filewave_id( df, "Client_is_tracking_enabled").to_numpy(): (a, b) = self._set_metric_pair(device_client_tracked, item) logger.info(f"device by tracking: {a}, {b}") # and by locked state for item in self._rollup_by_single_column_count_client_filewave_id( df, "Client_filewave_client_locked").to_numpy(): (a, b) = self._set_metric_pair(device_client_locked, item) logger.info(f"device by locked: {a}, {b}") # a bit of logic here, so rollup isn't via pandas... device_count_by_compliance = { ClientCompliance.STATE_OK: 0, ClientCompliance.STATE_ERROR: 0, ClientCompliance.STATE_WARNING: 0, ClientCompliance.STATE_UNKNOWN: 0 } for v in j["values"]: # if there is no last check in date, we want to assume it's NEVER checked in checkin_days = 999 if v[Client_last_check_in] is not None: checkin_date = datetime.datetime.strptime( v[Client_last_check_in], '%Y-%m-%dT%H:%M:%S.%fZ') delta = now - checkin_date checkin_days = delta.days total_crit = 0 total_normal = 0 # for devices with a filewave_id client_fw_id = v[Client_filewave_id] if client_fw_id is None: logger.warning( f"one of the device records doesn't have a client_fw_id; the json data is: {v}" ) else: per_device_state = soft_patches.get_perdevice_state( client_fw_id) if per_device_state is not None: total_crit = per_device_state.get_counter( True).total_not_completed() total_normal = per_device_state.get_counter( False).total_not_completed() # If we have a model number, store it in the metrics fw_model_number = 0 if v[DesktopClient_filewave_model_number] is not None: fw_model_number = v[DesktopClient_filewave_model_number] device_client_modelnumber.labels( v[Client_device_name]).set(fw_model_number) comp_check = ClientCompliance(v[Client_last_check_in], v[Client_total_disk_space], v[Client_free_disk_space], checkin_days, total_crit, total_normal) state = comp_check.get_compliance_state() if v[OperatingSystem_name] == "Chrome OS": logger.debug( f"state {ClientCompliance.get_compliance_state_str(state)} found for name: {v[Client_device_name]},\ last check in: {v[Client_last_check_in]},\ total disk: {v[Client_total_disk_space]},\ free disk: {v[Client_free_disk_space]},\ checkin days: {checkin_days},\ total crit/noral: {total_crit}/{total_normal},\ checkin compliance: {comp_check.get_checkin_compliance()}, disk compliance: {comp_check.get_checkin_compliance()}, patch compliance: {comp_check.get_patch_compliance()}" ) logger.debug("\r\n") device_count_by_compliance[state] += 1 if (checkin_days <= 1): buckets[0] += 1 elif checkin_days < 7: buckets[1] += 1 elif checkin_days < 30: buckets[2] += 1 else: buckets[3] += 1 for key, value in device_count_by_compliance.items(): device_client_compliance.labels( ClientCompliance.get_compliance_state_str(key)).set(value) device_checkin_days.labels('Less than 1').set(buckets[0]) device_checkin_days.labels('Less than 7').set(buckets[1]) device_checkin_days.labels('Less than 30').set(buckets[2]) device_checkin_days.labels('More than 30').set(buckets[3]) except AssertionError as e1: logger.error("The validation/assertions failed: %s" % (e1, ))
def install_into_environment(config_path, api_key, external_dns_name, polling_interval, skip_provisioning, verify_tls): init_logging() logger.setLevel("INFO") cfg = ExtraMetricsConfiguration() dirname = os.path.dirname(config_path) if not os.path.exists(dirname): logger.error( f"The directory for the configuration file does not exist: {dirname}") return if not os.path.exists(config_path) and os.path.isfile(config_path): if not os.access(config_path, os.W_OK): logger.error( f"The configuration file cannot be written to {config_path} - does this user have access?") return try: read_config_helper(cfg) except FileNotFoundError: if api_key is None or external_dns_name is None: logger.error( "When there is no configuration file you must specify an API key and external DNS name, which will then be stored in the config file") return assert cfg.section is not None cfg.set_verify_tls(verify_tls) if api_key is not None: cfg.set_fw_api_key(api_key) if external_dns_name is not None: cfg.set_fw_api_server(external_dns_name) if polling_interval is not None: cfg.set_polling_delay_seconds(polling_interval) try: with open(config_path, 'w+') as f: cfg.write_configuration(f) logger.info(f"saved configuration to file: {config_path}") except Exception as e: logger.error( "Unable to write the configuration file - normally this command requires sudo/root privs, did you use sudo?") logger.error(e) return # I use a flag here, because I want the WARNING text to be the last thing a user sees here present_warning = False if not skip_provisioning: if running_on_a_fwxserver_host(): if run_root_command(["ls", "-l"]) is False: logger.info( "provisioning is requested - but I've detected you are not running as root - aborting") raise NotRunningRoot( "provisioning is requested - but I've detected you are not running as root - aborting") try: provision_dashboards_into_grafana(cfg.get_fw_api_server_hostname()) provision_prometheus_scrape_configuration() provision_launch_of_extra_metrics_on_host() plugins = [ {"command": "update", "name": "grafana-piechart-panel"} # {"command": "install", "name": "redis-datasource"} ] for plugin in plugins: run_root_command(["/usr/local/sbin/grafana-cli", "--pluginsDir", "/usr/local/filewave/instrumentation_data/grafana/plugins", "plugins", plugin["command"], plugin["name"]]) except Exception as e: logger.error( "Error during provisioning of prometheus/grafana, are you using sudo?") logger.error(e) return else: present_warning = True q = FWRestQuery(cfg.get_fw_api_server_hostname(), cfg.get_fw_api_key(), cfg.get_verify_tls()) major, minor, patch = validate_runtime_requirements(q) log_config_summary(cfg, major, minor, patch) if present_warning: logger.warning("provisioning of metrics dashboards, setting prometheus scrape config and supervisord runtime was skipped as I didn't detect a FileWave Server installation - you can ignore this warning if you are intentionally setting this up on a different host (or in a container). To avoid this warning entirely, run the configuration with --skip-provisioning")
def log_config_summary(cfg, major, minor, patch): logger.info("") logger.info("Extra Metrics - Configuration Summary") logger.info("=====================================") logger.info(f"External DNS : {cfg.get_fw_api_server_hostname()}") logger.info(f"API Key : {cfg.get_fw_api_key()}") logger.info(f"FileWave Server : {major}.{minor}.{patch}") logger.info(f"Verify Certs : {cfg.get_verify_tls()}") poll_sec = cfg.get_polling_delay_seconds() logger.info( f"Polling Interval : {poll_sec} sec / {poll_sec / 60.0:.1f} min")