def events(self) -> Generator[dict, None, None]: from splunklib.client import Job job: Job = self.create_search( self.spl, query_kwargs={ "exec_mode": "normal", "earliest_time": self.earliest, "latest_time": self.latest, }, ) self.sid = job.sid logger.info( f"Creating splunk search with sid={self.sid}, waiting for job=Done" ) while not job.is_done(): logger.debug("Job not done, sleeping") time.sleep(5) logger.info(f"Job is done, getting results") count = 0 for result in self.get_results(job, count=100000000): count += 1 yield result logger.info(f"Processed {count} splunk results")
def _merge_batch(nodes: List[Node]) -> List[Node]: """Merge a single batch of nodes""" output: Dict[int, Node] = {} logger.debug(f"Merging batch of size {len(nodes)}") for node in nodes: node_key = hash(node) # First time seeing node. if node_key not in output: output[node_key] = node continue # Otherwise, update the node current = output[node_key] current.merge_with(node) output[node_key] = current logger.debug(f"Merged down to size {len(output)}") return list(output.values())
def __init__(self, behaviour_report_file: str, hash_metadata_file: str = None) -> None: behaviour_report = json.load(open(behaviour_report_file, "r")) if "attributes" not in behaviour_report: raise AttributeError( f"Supplied behaviour report does not contain any data") hash_metadata = None if hash_metadata_file: hash_metadata = json.load(open(hash_metadata_file, "r")) self.hash_metadata = hash_metadata self.behaviour_report = behaviour_report["attributes"] logger.info(f"Finished setting up GenericVTSandbox") for key in self.behaviour_report.keys(): if key not in self.KNOWN_ATTRIBUTES: logger.debug( f"Unknown key {key} found in VirusTotal sandbox report")
def __init__(self, triage: str) -> None: """A FireEye HX Triage DataSource. Parameters ---------- triage : str The path to the HX .mans file. Examples ------- >>> triage = HXTriage(triage="/path/to/triage.mans") """ self.file_path = triage self.alert_files = {"hits.json": False, "threats.json": False} self.identified_files: Dict[str, str] = {} logger.info(f"Setting up HXTriage for {self.file_path}") self.tempdir = tempfile.TemporaryDirectory(suffix="_beagle") logger.debug(f"Generated temporary directory {self.tempdir.name}") # .mans files are simply zips. with zipfile.ZipFile(self.file_path) as mans: for audit_file in mans.namelist(): # Save the alert files if audit_file in self._ALERT_FILES: with mans.open(audit_file) as f: mans.extract(audit_file, f"{self.tempdir.name}") self.alert_files[audit_file] = True logger.debug(f"Found alert file {audit_file}") # Skip files with '.' in them if "." in audit_file: continue # Get the audit type. with mans.open(audit_file) as f: header = f.read(500).decode("utf-8") match = self._GENERATOR_REX.search(header) if match: version = match.groups()[0] else: continue # Skip if not supported if version not in self._SUPPORTED_AUDITS: continue mans.extract(audit_file, f"{self.tempdir.name}") self.identified_files[audit_file] = version logger.debug(f"Mapped {audit_file} to {version}")
def parse_alert_files(self, temp_dir: str) -> Generator[dict, None, None]: """Parses out the alert files from the hits.json and threats.json files Parameters ---------- temp_dir : str Folder which contains the expanded triage. Yields ------- Generator[dict, None, None] The next event found in the Triage. """ threats = None # We will always have 'hits.json' try: hits = json.load(open(f"{temp_dir}/hits.json", "r")) except Exception as e: logger.warning(f"Could not load JSON from hits.json, skipping alerts!") logger.debug(e) return if self.alert_files["threats.json"]: try: threats = json.load(open(f"{temp_dir}/threats.json", "r")) except Exception as e: logger.warning(f"Could not load JSON from threats.json, alert names may be UUIDs") logger.debug(e) else: logger.info(f"Could not find threats.json, alert names may be UUIDs") for alert in hits: # Introduce an alerting event type for HX. alert["event_type"] = "alertEvent" # If we have the threats file, convert the "threat_id" portion of the alert to # understandable values. if threats: threat = next( filter(lambda threat_entry: threat_entry["_id"] == alert["threat_id"], threats), None, ) if threat: logger.info( f"Matched up {alert['threat_id']} to {threat.get('display_name', threat['uri_name'])}" ) alert["_threat_data"] = threat # Add the time the alert happend alert["event_time"] = self._hx_time_to_epoch(alert["matched_at"]) alerting_event_type = alert["data"]["key"]["event_type"] # Strip the event type alerting_event = { k.replace(f"{alerting_event_type}/", ""): v for k, v in alert["data"]["values"].items() } alerting_event["event_time"] = self._hx_time_to_epoch(alerting_event["timestamp"]) alert["data"]["values"] = self._fix_missing_fields(alerting_event) yield alert
def global_network_events(self) -> Generator[dict, None, None]: root_proc_name = self.report.get("target", {}).get("file", {"name": ""})["name"] root_proc = None if root_proc_name: process_entries = list(self.processes.values()) # Get the submitted sample to match to the network events. for proc in process_entries: if proc[FieldNames.PROCESS_IMAGE] == root_proc_name: root_proc = proc break if not root_proc_name or not root_proc: root_proc = list(self.processes.values())[0] logger.debug(f"Found root process as {root_proc}") network_connections = self.report.get("network", {}) # Connections # Example entry: # { # "src": "192.168.168.201", # "dst": "192.168.168.229", # "offset": 299, # "time": 11.827166080474854, # "dport": 55494, # "sport": 5355, # }, for udp_conn in network_connections.get("udp", []): yield { FieldNames.IP_ADDRESS: udp_conn["dst"], FieldNames.PORT: udp_conn["dport"], FieldNames.EVENT_TYPE: EventTypes.CONNECTION, FieldNames.PROTOCOL: Protocols.UDP, **root_proc, } # tcp connections for tcp_conn in network_connections.get("tcp", []): yield { FieldNames.IP_ADDRESS: tcp_conn["dst"], FieldNames.PORT: tcp_conn["dport"], FieldNames.EVENT_TYPE: EventTypes.CONNECTION, FieldNames.PROTOCOL: Protocols.TCP, **root_proc, } # icmp connections for icmp_conn in network_connections.get("icmp", []): yield { FieldNames.IP_ADDRESS: icmp_conn["dst"], FieldNames.EVENT_TYPE: EventTypes.CONNECTION, FieldNames.PROTOCOL: Protocols.ICMP, **root_proc, } for dns_request in network_connections.get("dns", []): # If answers, this will make the resolved to edge from the generic transformer. if "answers" in dns_request and dns_request["answers"]: for answer in dns_request["answers"]: yield { FieldNames.HTTP_HOST: dns_request["request"], FieldNames.EVENT_TYPE: EventTypes.DNS_LOOKUP, FieldNames.IP_ADDRESS: answer["data"], **root_proc, } else: # Otherwise, only add the DNS request yield { FieldNames.HTTP_HOST: dns_request["request"], FieldNames.EVENT_TYPE: EventTypes.DNS_LOOKUP, **root_proc, } for http_request in network_connections.get("http_ex", []): yield { FieldNames.EVENT_TYPE: EventTypes.HTTP_REQUEST, FieldNames.HTTP_METHOD: http_request["method"], FieldNames.HTTP_HOST: http_request["host"], FieldNames.IP_ADDRESS: http_request["dst"], FieldNames.URI: http_request["uri"], **root_proc, }