def on_portscan_worker_reply(ch, method_frame, properties, portscan_results_str): log_console(f"portscan: received reply: {portscan_results_str}") portscan_results = json.loads(portscan_results_str) ch.close()
def get_device_status(device): device_status = dict() device_status["availability"] = False device_status["response_time"] = None device_status["cpu"] = None device_status["memory"] = None device_status["last_heard"] = None env = None response_time = None if device["os"] in {"ios", "iosxe", "nxos-ssh"} and device["transport"] == "napalm": try: time_start = time.time() result, env = get_device_info(device, "environment") response_time = time.time() - time_start except BaseException as e: info = f"!!! Exception in monitoring device, get environment: {repr(e)}" log_console(info) log_event( str(datetime.now())[:-3], "device", device["name"], "SEVERE", info ) result = "failed" else: try: time_start = time.time() result, facts = get_device_info(device, "facts", get_live_info=True) response_time = time.time() - time_start except BaseException as e: info = f"!!! Exception in monitoring device, get facts: {repr(e)}" log_console(info) log_event( str(datetime.now())[:-3], "device", device["name"], "SEVERE", info ) result = "failed" if result != "success": log_event( str(datetime.now())[:-3], "device monitor", device["name"], "SEVERE", f"Availability failed for device: {device['name']}", ) else: device_status["availability"] = True if response_time: device_status["response_time"] = int(response_time * 1000) device_status["last_heard"] = str(datetime.now())[:-3] if env: device_status["cpu"] = calculate_cpu(env["environment"]["cpu"]) device_status["memory"] = calculate_memory(env["environment"]["memory"]) return device_status
def portscan_store(): portscan_info = request.get_json() if not portscan_info: return "Must provide portscan information in JSON body", 400 if "source" not in portscan_info: return "Must provide 'source' in portscan information", 400 if "serial" not in portscan_info: return "Must provide 'serial' in portscan information", 400 if "host_ip" not in portscan_info: return "Must provide 'host_ip' in portscan information", 400 if "host_name" not in portscan_info: return "Must provide 'host_name' in portscan information", 400 if "timestamp" not in portscan_info: return "Must provide 'timestamp' in portscan information", 400 if "scan_output" not in portscan_info: return "Must include 'scan_output' in portscan information", 400 record_portscan(portscan_info) log_console( f"Received portscan store request from {portscan_info['source']} for host {portscan_info['host_name']}" ) return {}, 200
def import_services(filename=None): db.session.query(Service).delete() try: with open("quokka/data/" + filename, "r") as import_file: services = yaml.safe_load(import_file.read()) except FileNotFoundError as e: log_console(f"Could not import services file: {repr(e)}") # validate services: make sure no duplicate ids ids = set() for service in services: if service["id"] in ids: log_event( str(datetime.now())[:-3], "importing services", filename, "ERROR", f"Duplicate service id: {service['id']}", ) continue ids.add(service["id"]) service_obj = Service(**service) db.session.add(service_obj) db.session.commit() return
def record_traceroute(traceroute_info): traceroute = dict() if "source" not in traceroute_info: log_console(f"record_traceroute: missing 'source' in traceroute info") return if "target" not in traceroute_info: log_console(f"record_traceroute: missing 'target' in traceroute info") return if "token" not in traceroute_info: log_console(f"record_traceroute: missing 'token' in traceroute_info") return if "timestamp" not in traceroute_info: log_console(f"record_traceroute: missing 'timestamp' in traceroute info") return if "traceroute_img" not in traceroute_info: log_console(f"record_traceroute: missing 'traceroute_img' in traceroute info") return traceroute["source"] = traceroute_info["source"] traceroute["target"] = traceroute_info["target"] traceroute["token"] = traceroute_info["token"] traceroute["timestamp"] = traceroute_info["timestamp"] traceroute["traceroute_img"] = traceroute_info["traceroute_img"] traceroute_obj = Traceroute(**traceroute) db.session.add(traceroute_obj) db.session.commit()
def worker_register(): registration_info = request.get_json() if not registration_info: return "Must provide registration information in JSON body", 400 if "serial" not in registration_info: return "Must provide 'serial' in registration information", 400 if "name" not in registration_info: return "Must provide 'name' in registration information", 400 worker = get_worker(host=registration_info["name"], worker_type=registration_info["worker_type"]) if worker is None: return "Unknown worker name in registration information", 400 if registration_info["serial"] != worker["serial"]: return "Serial number in registration information does not match worker serial", 400 log_console( f"Received registration request from {registration_info['name']}, serial no: {registration_info['serial']}" ) worker["availability"] = True worker["last_heard"] = str(datetime.now())[:-3] set_worker(worker) return {}, 200
def monitor(self, interval): while True and not self.terminate: workers = get_all_workers() log_console( f"monitor:worker Beginning monitoring for {len(workers)} workers" ) for worker in workers: if self.terminate: break if not worker["last_heard"]: continue last_heard_time = datetime.strptime(worker["last_heard"], "%Y-%m-%d %H:%M:%S.%f") print(f"now: {datetime.now()}, last_heard: {last_heard_time}") if (datetime.now() - last_heard_time) > timedelta( seconds=MAX_NOT_HEARD_SECONDS): worker["availability"] = False record_worker_status(worker) set_worker(worker) for _ in range(0, int(interval / 10)): time.sleep(10) if self.terminate: break log_console("...gracefully exiting monitor:worker")
def get_summaries(self, items, item_type, get_hour_data_function): log_console( f"Calculating {item_type} summaries for {self.current_hour}") hourly_summaries = dict() for item in items: service_status_data = get_hour_data_function( item["id"], self.current_hour) hourly_summary = dict() hourly_summary["id"] = item["id"] hourly_summary["hour"] = str( datetime.fromisoformat(self.current_hour)) hourly_summary["availability"] = 0 hourly_summary["response_time"] = 0 num_availability_records = 0 num_response_time_records = 0 for service_status_data_item in service_status_data: num_availability_records += 1 if service_status_data_item["availability"]: hourly_summary["availability"] += 100 hourly_summary[ "response_time"] += service_status_data_item[ "response_time"] num_response_time_records += 1 if num_response_time_records > 0: hourly_summary["response_time"] = ( hourly_summary["response_time"] / num_response_time_records) if num_availability_records > 0: hourly_summary["availability"] = ( hourly_summary["availability"] / num_availability_records) log_console( f"Summary: {item_type} hourly summary for {item['name']}: {hourly_summary}" ) hourly_summaries[item["id"]] = hourly_summary rsp_time_in_seconds = hourly_summary["response_time"] / 1000 if "sla_response_time" in item and rsp_time_in_seconds > item[ "sla_response_time"]: info = f"SLA response time violation, {rsp_time_in_seconds:.2f} > {item['sla_response_time']}" log_event( str(datetime.now())[:-3], item_type, item["name"], "WARNING", info) if ("sla_availability" in item and hourly_summary["availability"] < item["sla_availability"]): info = f"SLA availability violation, {hourly_summary['availability']:.2f} < {item['sla_availability']}" log_event( str(datetime.now())[:-3], item_type, item["name"], "WARNING", info) return hourly_summaries
def monitor(self, interval): while True and not self.terminate: device_ids = get_all_device_ids() log_console( f"Monitor: Beginning Configuration monitoring for {len(device_ids)} devices" ) for device_id in device_ids: if self.terminate: break result, device = get_device( device_id=device_id ) # re-retrieve device as it may have been changed if result != "success": log_console( f"Configuration Monitor: Error retrieving device from DB. id: {device_id}, error: {device}" ) continue try: result, config = get_device_info(device, "config", get_live_info=True) if result != "success": log_console( f"!!! Unable to get device info (config) for {device['name']}" ) continue except BaseException as e: log_console( f"!!! Exception getting device info in configuration monitoring for {device['name']}: {repr(e)}" ) continue # If we made it here, we got the configuration, so store it in the DB record_device_config(device_id, config["config"]["running"]) log_event( str(datetime.now())[:-3], "configuration", device['name'], "INFO", f"Stored configuration for: {device['name']}", ) for _ in range(0, int(interval / 10)): sleep(10) if self.terminate: break log_console("...gracefully exiting monitor:configuration")
def stop_db_maintenance_thread(): log_console("--- ---> Shutting down dbmaintenance thread") if ThreadManager.db_maintenance_task and ThreadManager.db_maintenance_thread: ThreadManager.db_maintenance_task.set_terminate() ThreadManager.db_maintenance_thread.join() ThreadManager.db_maintenance_task = None ThreadManager.db_maintenance_thread = None
def stop_summaries_thread(): log_console("--- ---> Shutting down summaries thread") if ThreadManager.summaries_task and ThreadManager.summaries_thread: ThreadManager.summaries_task.set_terminate() ThreadManager.summaries_thread.join() ThreadManager.summaries_task = None ThreadManager.summaries_thread = None
def stop_host_thread(): log_console("--- ---> Shutting down host monitoring thread") if ThreadManager.host_monitor_task and ThreadManager.host_monitor_thread: ThreadManager.host_monitor_task.set_terminate() ThreadManager.host_monitor_thread.join() ThreadManager.host_monitor_task = None ThreadManager.host_monitor_thread = None
def stop_discovery_thread(): log_console("--- ---> Shutting down discovery thread") if ThreadManager.discovery_task and ThreadManager.discovery_thread: ThreadManager.discovery_task.set_terminate() ThreadManager.discovery_thread.join() ThreadManager.discovery_task = None ThreadManager.discovery_thread = None
def initiate_capture(ip, protocol, port, count): monitor = CaptureManager.find_monitor(ip) worker = get_worker(host=monitor, worker_type=CaptureManager.worker_type) if worker is None: log_console( f"Capture Manager: could not find worker, host={monitor}, worker_type={CaptureManager.worker_type} in DB" ) return if ( protocol ): # Translate port and protocol if necessary, e.g. 'http' must become 'tcp', '80' protocol, port = CaptureManager.translate_protocol_and_port( protocol, port) capture_info = { "quokka": get_this_ip(), "interface": interface, "ip": ip, "protocol": protocol, "port": port, "count": count, } capture_info_json = json.dumps(capture_info) if worker["connection_type"] == "rabbitmq": channel = CaptureManager.get_channel(monitor) channel.basic_publish(exchange="", routing_key="capture_queue", body=capture_info_json) log_console( f"Capture Manager: starting capture: ip:{ip} protocol:{protocol} port:{port} count:{count}" ) elif worker["connection_type"] == "http": command = dict() command["host"] = worker["host"] command["serial"] = worker["serial"] command["worker_type"] = CaptureManager.worker_type command["command"] = "start-capture" command["command_info"] = capture_info_json command["delivered"] = False set_command(command)
def start(self, interval): while True and not self.terminate: this_hour = str(datetime.now())[:-13] if this_hour == self.current_hour: time.sleep(60) continue # Get datetime for 24 hours ago now = datetime.now() now_minus_24_hours = now - timedelta(hours=24) now_minus_2_hours = now - timedelta(hours=2) try: # Clean up time-series data, which can be deleted after 24 hours for table in [ DeviceStatus, HostStatus, ServiceStatus, WorkerStatus ]: count = table.query.filter( table.timestamp < str(now_minus_2_hours)).delete() log_console( f"DbMaintenanceTask: deleted {count} records from {table}" ) # Clean up packet capture data, which we allow to hang around for 24 hours for table in [Capture, Portscan, Traceroute]: count = table.query.filter( table.timestamp < str(now_minus_24_hours)).delete() log_console( f"DbMaintenanceTask: deleted {count} records from {table}" ) # Clean up commands greater than 24 hours old count = Command.query.filter( Command.timestamp < str(now_minus_24_hours)).delete() log_console( f"DbMaintenanceTask: deleted {count} records from Command") db.session.commit() except BaseException as e: log_console( f"!!! uh-oh, exception in DbMaintenance thread: {e}") self.current_hour = this_hour log_console("...gracefully exiting db maintenance task")
def shutdown(): log_console("\n\n\n---> Entering shutdown sequence") ThreadManager.initiate_terminate_all_threads() ThreadManager.stop_discovery_thread() ThreadManager.stop_host_thread() ThreadManager.stop_service_thread() ThreadManager.stop_summaries_thread() ThreadManager.stop_worker_thread() ThreadManager.stop_device_threads() ThreadManager.stop_db_maintenance_thread() log_console("\n---> all threads shut down, terminating.")
def import_compliance(filename=None): db.session.query(Compliance).delete() try: with open("quokka/data/" + filename, "r") as import_file: standards = yaml.safe_load(import_file.read()) except FileNotFoundError as e: log_console(f"Could not import compliance file: {repr(e)}") for standard in standards: standard_obj = Compliance(**standard) db.session.add(standard_obj) db.session.commit() return
def device_heartbeat(): heartbeat_info = request.get_json() if not heartbeat_info: return "Must provide heartbeat information in JSON body", 400 if "serial" not in heartbeat_info: return "Must provide 'serial' in heartbeat information", 400 if "name" not in heartbeat_info: return "Must provide 'name' in heartbeat information", 400 result, device = get_device(device_name=heartbeat_info["name"]) if result != "success": return "Unknown device name in heartbeat information", 400 if heartbeat_info["serial"] != device["serial"]: return "Serial number in heartbeat information does not match device serial", 400 device["availability"] = True device["last_heard"] = str(datetime.now())[:-3] if "vendor" in heartbeat_info: device["vendor"] = heartbeat_info["vendor"] if "model" in heartbeat_info: device["model"] = heartbeat_info["model"] if "os" in heartbeat_info: device["os"] = heartbeat_info["os"] if "version" in heartbeat_info: device["version"] = heartbeat_info["version"] if "response_time" in heartbeat_info: device["response_time"] = heartbeat_info["response_time"] if "cpu" in heartbeat_info: device["cpu"] = heartbeat_info["cpu"] if "memory" in heartbeat_info: device["memory"] = heartbeat_info["memory"] if "uptime" in heartbeat_info: device["uptime"] = heartbeat_info["uptime"] record_device_status(device) set_device(device) log_console( f"Received heartbeat from {heartbeat_info['name']}, info={heartbeat_info}" ) return {}, 200
def initiate_traceroute(target, token): # Target could be a URL; if so, use urlparse to extract the network location (hostname) if target.startswith("http://") or target.startswith("https://"): parsed_target = urlparse(target) target = parsed_target.netloc monitor = TracerouteManager.find_monitor(target) worker = get_worker(host=monitor, worker_type=TracerouteManager.worker_type) if worker is None: log_console( f"Traceroute Manager: could not find worker, host={monitor}, worker_type={TracerouteManager.worker_type} in DB" ) return traceroute_info = { "quokka": get_this_ip(), "target": target, "token": token, } traceroute_info_json = json.dumps(traceroute_info) if worker["connection_type"] == "rabbitmq": channel = TracerouteManager.get_channel(monitor) channel.basic_publish(exchange="", routing_key="traceroute_queue", body=traceroute_info_json) log_console( f"Traceroute Manager: starting traceroute: target : {target}") elif worker["connection_type"] == "http": command = dict() command["host"] = worker["host"] command["serial"] = worker["serial"] command["worker_type"] = TracerouteManager.worker_type command["command"] = "start-capture" command["command_info"] = traceroute_info_json command["delivered"] = False set_command(command)
def initiate_portscan(host_ip, host_name, token): monitor = PortscanManager.find_monitor(host_ip) worker = get_worker(host=monitor, worker_type=PortscanManager.worker_type) if worker is None: log_console( f"Portscan Manager: could not find worker, host={monitor}, worker_type={PortscanManager.worker_type} in DB" ) return portscan_info = { "quokka": get_this_ip(), "host_ip": host_ip, "host_name": host_name, "token": token, } portscan_info_json = json.dumps(portscan_info) if worker["connection_type"] == "rabbitmq": channel = PortscanManager.get_channel(monitor) channel.basic_publish(exchange="", routing_key="portscan_queue", body=portscan_info_json) log_console( f"Portscan Manager: starting portscan: host_ip : {host_ip}" f"Portscan Manager: starting portscan: host_name : {host_name}" ) elif worker["connection_type"] == "http": command = dict() command["host"] = worker["host"] command["serial"] = worker["serial"] command["worker_type"] = PortscanManager.worker_type command["command"] = "start-capture" command["command_info"] = portscan_info_json command["delivered"] = False set_command(command)
def config_diff(device, config_to_diff): if device["transport"] == "napalm": napalm_device = get_napalm_device(device) try: napalm_device.open() napalm_device.load_merge_candidate(filename=config_to_diff) return "success", napalm_device.compare_config() except BaseException as e: log_console(f"Exception in doing load_merge_candidate: {repr(e)}") return "failure", repr(e) else: log_console( f"!!! Unable to compare configurations, on live config to compare") return "failure", "Unable to compare configurations"
def traceroute_register(): registration_info = request.get_json() if not registration_info: return "Must provide registration information in JSON body", 400 if "serial" not in registration_info: return "Must provide 'serial' in registration information", 400 if "name" not in registration_info: return "Must provide 'name' in registration information", 400 result, device = get_device(device_name=registration_info["name"]) if result != "success": return "Unknown device name in registration information", 400 if registration_info["serial"] != device["serial"]: return "Serial number in registration information does not match device serial", 400 log_console( f"Received registration request from {registration_info['name']}, serial no: {registration_info['serial']}" ) return {}, 200
def check_os_compliance(device): facts = None standard = Compliance.query.filter_by(**{ "vendor": device["vendor"], "os": device["os"] }).one_or_none() if standard is None: log_console( f"!!! Error retrieving compliance record for this device {device['name']}" ) return False try: result, facts = get_device_info(device, "facts", get_live_info=True) except BaseException as e: log_console( f"!!! Exception getting device info in compliance monitoring for {device['name']}: {repr(e)}" ) result = "failed" if result == "failed" or not facts or "facts" not in facts or "os_version" not in facts[ "facts"]: log_console( f"!!! Error retrieving version info for this device {device['name']}" ) return False return check_version(device, standard=standard.standard_version, actual=facts["facts"]["os_version"])
def start(self, interval): while True and not self.terminate: this_hour = str(datetime.now())[:-13] if this_hour == self.current_hour: time.sleep(60) continue service_hourly_summaries = self.get_summaries( get_all_services(), "services", get_service_status_data_for_hour) record_service_hourly_summaries(service_hourly_summaries) host_hourly_summaries = self.get_summaries( get_all_hosts(), "hosts", get_host_status_data_for_hour) record_host_hourly_summaries(host_hourly_summaries) self.get_summaries(get_all_devices(), "devices", get_device_status_data_for_hour) self.current_hour = this_hour log_console("...gracefully exiting summaries task")
def stop_device_threads(): log_console( "--- ---> Shutting down device monitoring threads (device, configuration and compliance)" ) if ThreadManager.device_monitor_task and ThreadManager.device_monitor_thread: ThreadManager.device_monitor_task.set_terminate() ThreadManager.device_monitor_thread.join() if ThreadManager.compliance_monitor_task and ThreadManager.compliance_monitor_thread: ThreadManager.compliance_monitor_task.set_terminate() ThreadManager.compliance_monitor_thread.join() if ThreadManager.configuration_monitor_task and ThreadManager.configuration_monitor_thread: ThreadManager.configuration_monitor_task.set_terminate() ThreadManager.configuration_monitor_thread.join() ThreadManager.device_monitor_task = None ThreadManager.device_monitor_thread = None ThreadManager.compliance_monitor_task = None ThreadManager.compliance_monitor_thread = None ThreadManager.configuration_monitor_task = None ThreadManager.configuration_monitor_thread = None
def capture_store(): capture_info = request.get_json() if not capture_info: return "Must provide capture information in JSON body", 400 if "serial" not in capture_info: return "Must provide 'serial' in capture information", 400 if "source" not in capture_info: return "Must provide 'source' in capture information", 400 if "timestamp" not in capture_info: return "Must provide 'timestamp' in capture information", 400 if "packets" not in capture_info: return "Must include 'packets' in capture information", 400 record_capture(capture_info["timestamp"], capture_info["source"], capture_info["packets"]) log_console( f"Received capture store request from {capture_info['source']}, pkts={len(capture_info['packets'])}" ) return {}, 200
def get_device_info_napalm(device, requested_info, get_live_info=False): # Try to get the info from the DB first if requested_info == "facts" and not get_live_info: result, facts = get_facts(device["name"]) if result == "success": return "success", {"facts": facts} napalm_device = get_napalm_device(device) try: napalm_device.open() if requested_info == "facts": facts = napalm_device.get_facts() set_facts(device, {"facts": facts}) return "success", {"facts": napalm_device.get_facts()} elif requested_info == "environment": return "success", {"environment": napalm_device.get_environment()} elif requested_info == "interfaces": return "success", {"interfaces": napalm_device.get_interfaces()} elif requested_info == "arp": return "success", {"arp": napalm_device.get_arp_table()} elif requested_info == "mac": return "success", {"mac": napalm_device.get_mac_address_table()} elif requested_info == "config": return "success", {"config": napalm_device.get_config()} elif requested_info == "counters": return "success", { "counters": napalm_device.get_interfaces_counters() } else: return "failure", "Unknown requested info" except BaseException as e: log_console(f"!!! Exception in get device info: {repr(e)}") return "failure", repr(e)
def traceroute_store(): traceroute_info = request.get_json() if not traceroute_info: return "Must provide traceroute information in JSON body", 400 if "source" not in traceroute_info: return "Must provide 'source' in traceroute information", 400 if "serial" not in traceroute_info: return "Must provide 'serial' in traceroute information", 400 if "target" not in traceroute_info: return "Must provide 'target' in traceroute information", 400 if "timestamp" not in traceroute_info: return "Must provide 'timestamp' in traceroute information", 400 if "traceroute_img" not in traceroute_info: return "Must include 'traceroute_img' in traceroute information", 400 record_traceroute(traceroute_info) log_console( f"Received traceroute store request from {traceroute_info['source']} for target {traceroute_info['target']}" ) return {}, 200
def device_register(): registration_info = request.get_json() if not registration_info: return "Must provide registration information in JSON body", 400 if "serial" not in registration_info: return "Must provide 'serial' in registration information", 400 if "name" not in registration_info: return "Must provide 'name' in registration information", 400 result, device = get_device(device_name=registration_info["name"]) if result != "success": return "Unknown device name in registration information", 400 if registration_info["serial"] != device["serial"]: return "Serial number in registration information does not match device serial", 400 log_console( f"Received registration request from {registration_info['name']}, serial no: {registration_info['serial']}" ) device["availability"] = True device["last_heard"] = str(datetime.now())[:-3] set_device(device) return {}, 200
def check_config_compliance(device): standard = Compliance.query.filter_by(**{ "vendor": device["vendor"], "os": device["os"] }).one_or_none() if standard is None: log_console( f"!!! Error retrieving compliance record for this device {device['name']}" ) return False standard_filename = "quokka/data/" + standard.standard_config_file result, diff = config_diff(device, standard_filename) if result != "success": return False if len(diff) > 0: with open(standard_filename + ".diff." + device["name"], "w") as config_out: config_out.write(diff) return False return True