def send_analysis(self, sample, outdir, metadata, quality): payload = {"analysis_uid": self.analysis_uid} payload.update(metadata) if self.test_run: headers = dict(self.test_headers) else: headers = dict(self.headers) headers["quality"] = quality task = Task(headers, payload=payload) task.add_payload('sample', sample) if self.test_run: task.add_payload('testcase', self.current_task.payload['testcase']) if self.config.config.getboolean("drakrun", "attach_profiles", fallback=False): self.log.info("Uploading profiles...") task.add_payload("profiles", self.build_profile_payload()) self.log.info("Uploading artifacts...") for resource in self.upload_artifacts(self.analysis_uid, outdir): task.add_payload(resource.name, resource) self.send_task(task)
def _single_file_test( self, name, content, tag, kind=None, platform=None, extension=None ): sample = TestResource(name, content) task = Task({ "type": "sample", "kind": "raw" }, payload={ "sample": sample, "extraction_level": 999 }) results = self.run_task(task) if kind is None: self.assertTasksEqual(results, []) else: expected_headers = { "origin": "karton.classifier", "type": "sample", "stage": "recognized", "quality": "high", "kind": kind } if platform: expected_headers["platform"] = platform if extension: expected_headers["extension"] = extension self.assertTasksEqual(results, [ Task(expected_headers, payload={ "sample": sample, "extraction_level": 999, "tags": [tag] }) ])
def mock_task(resource: Resource) -> Task: task = Task({ "type": "sample", "kind": "raw", }) task.add_payload("sample", resource) return task
def process(self, task: Task): dumps = task.get_resource("dumps.zip") sample = task.get_resource("sample") with dumps.extract_temporary() as temp: family = self.analyze_dumps(sample, temp) testcase = TestCase.from_json(task.payload["testcase"]) expected_family = testcase.ripped if family is None or expected_family != family: self.log.error( f"Failed to rip {sample.sha256}. Expected {expected_family}, ripped {family}" ) result = 'FAIL' else: self.log.info(f"Ripping {sample.sha256} OK: {family}") result = 'OK' out_res = json.dumps({ "sample": sample.sha256, "family": { "expected": expected_family, "ripped": family }, "result": result }) task = Task({"type": "analysis-test-result", "kind": "drakrun"}) res = LocalResource(name=self.current_task.root_uid, bucket='draktestd', content=out_res) res._uid = res.name task.add_payload("result", res) self.send_task(task)
def process(self, task: Task) -> None: # type: ignore sample = task.get_resource("sample") headers = task.headers if headers["type"] == "sample": self.log.info("Analyzing original binary") self.analyze_sample(sample) elif headers["type"] == "analysis": sample_hash = hashlib.sha256(sample.content or b"").hexdigest() self.log.info(f"Processing analysis, sample: {sample_hash}") dumps = task.get_resource("dumps.zip") dumps_metadata = task.get_payload("dumps_metadata") with dumps.extract_temporary() as tmpdir: # type: ignore dump_infos = [] for dump_metadata in dumps_metadata: dump_path = os.path.join(tmpdir, dump_metadata["filename"]) if not self._is_safe_path(tmpdir, dump_path): self.log.warning( f"Path traversal attempt: {dump_path}") continue dump_base = int(dump_metadata["base_address"], 16) dump_infos.append(DumpInfo(path=dump_path, base=dump_base)) self.analyze_dumps(sample, dump_infos) self.log.debug("Printing gc stats") self.log.debug(gc.get_stats())
def process_joesandbox(self, task: Task) -> List[str]: log.info("Processing joesandbox analysis") yara_matches: List[str] = [] with tempfile.TemporaryDirectory() as tmpdir: dumpsf = os.path.join(tmpdir, "dumps.zip") task.get_resource("dumps.zip").download_to_file( dumpsf) # type: ignore zipf = zipfile.ZipFile(dumpsf) zipf.extractall(tmpdir, pwd=b"infected") for rootdir, _dirs, files in os.walk(tmpdir): for filename in files: with open(f"{rootdir}/{filename}", "rb") as dumpf: content = dumpf.read() yara_matches += self.scan_sample(content) return yara_matches
def test_process_document_docx(self): magic, mime = ( "Microsoft Word 2007+...", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.docx") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "document", "mime": mime, "extension": "docx", "platform": "win32", }, payload={ "sample": resource, "tags": ["document:win32:docx"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_process_document_xls(self): magic, mime = ( "Composite Document File V2 Document...", "application/vnd.ms-excel", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.xls") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "document", "mime": mime, "extension": "xls", "platform": "win32", }, payload={ "sample": resource, "tags": ["document:win32:xls"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def main(self) -> list: # Perform Operations on self.data to unpack the sample pe = pefile.PE(data=self.data) extractedPayload, extractedDecryptionSection, extractedValue = self.selectingSections( pe) decrementationCounter = extractedValue // 512 # that's how it is calculated obfuscatedPayload = self.payloadDecrypt( self.payloadDecode(extractedPayload), decrementationCounter) deobfuscatedPayload = self.runObfuscationCode(obfuscatedPayload) unpackedExecutable = self.decryptSecondStage( deobfuscatedPayload, extractedDecryptionSection) task = Task( headers={ 'type': 'sample', 'kind': 'runnable', 'stage': 'recognized' }, payload={ 'parent': Resource(name='sample', content=self.data), # Set Parent Data (Packed Sample) 'sample': Resource(name='unpacked', content=unpackedExecutable ) # Set Child Data (Unpacked Sample) }) # A list of tasks must be returned, as there can be more than one unpacked child return [task]
def test_process_archive_cab(self): magic, mime = ( "Microsoft Cabinet archive data...", "application/vnd.ms-cab-compressed", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.cab") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "archive", "mime": mime, "extension": "cab", }, payload={ "sample": resource, "tags": ["archive:cab"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_process_runnable_win32_exe(self): magic, mime = ( "PE32 executable (GUI) Intel 80386 Mono/.Net assembly...", "application/x-dosexec", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "runnable", "mime": mime, "extension": "exe", "platform": "win32", }, payload={ "sample": resource, "tags": ["runnable:win32:exe"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_process_runnable_win32_swf(self): magic, mime = ( "Macromedia Flash data (compressed)...", "application/x-shockwave-flash", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.swf") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "runnable", "mime": mime, "extension": "swf", "platform": "win32", }, payload={ "sample": resource, "tags": ["runnable:win32:swf"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_process_runnable_win32_msi(self): magic, mime = ( "Composite Document File V2 Document, MSI Installer...", "application/x-msi", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.msi") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "runnable", "mime": mime, "extension": "msi", "platform": "win32", }, payload={ "sample": resource, "tags": ["runnable:win32:msi"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_process_archive_iso(self): magic, mime = ( "ISO 9660 CD-ROM filesystem data...", "application/x-iso9660-image", ) self.karton = mock_classifier(magic, mime) resource = mock_resource("file.iso") res = self.run_task(mock_task(resource)) expected = Task( headers={ "type": "sample", "stage": "recognized", "origin": "karton.classifier", "quality": "high", "kind": "archive", "mime": mime, "extension": "iso", }, payload={ "sample": resource, "tags": ["archive:iso"], "magic": magic, }, ) self.assertTasksEqual(res, [expected])
def test_ascii_magic(self): test_groups = os.path.join(os.path.dirname(__file__), "testdata") for group in os.listdir(test_groups): test_group_dir = os.path.join(test_groups, group) for case in os.listdir(test_group_dir): if "." in os.path.basename(case): continue with self.subTest(group + "/" + case): case_path = os.path.join(test_group_dir, case) with open(case_path, "rb") as f: content = f.read() sample = TestResource(case, content) if os.path.isfile(case_path + ".decoded"): with open( os.path.join(test_group_dir, case) + ".decoded", "rb") as f: decoded = f.read() expected = Task( { "type": "sample", "kind": "raw", "origin": "karton.asciimagic", }, payload={ "parent": sample, "sample": TestResource(case, decoded), }, ) elif os.path.isfile(case_path + ".exe.decoded"): with open( os.path.join(test_group_dir, case) + ".exe.decoded", "rb") as f: decoded = f.read()
def process(self, task: Task) -> None: # type: ignore config = task.get_payload("config") family = task.headers["family"] dhash = config_dhash(config) # Parse the config using iocextract library iocs = parse(family, config) if not iocs: # Nothing actionable found - skip the config return # Upload structured data to MISP event = MISPEvent() event.uuid = str(uuid5(self.CONFIG_NAMESPACE, dhash)) event.add_tag(f"mwdb:family:{family}") event.info = f"Malware configuration ({family})" if self.mwdb_url is not None: event.add_attribute("link", f"{self.mwdb_url}/config/{dhash}") for o in iocs.to_misp(): event.add_object(o) misp = ExpandedPyMISP(self.misp_url, self.misp_key, self.misp_verifycert) misp.add_event(event)
def process_config(self, task: Task, mwdb: MWDB) -> MWDBConfig: """ Processing of Config task Clarification: sample -> parent -> config sample is original sample parent is parent of the config config is config :param mwdb: MWDB instance :return: MWDBConfig object """ config_data = task.get_payload("config") family = (task.headers["family"] or config_data.get("family") or config_data.get("type", "unknown")) if task.has_payload("sample"): sample = self._upload_file(task, mwdb, task.get_payload("sample")) if sample: self.log.info("[sample %s] Adding tag ripped:%s", sample.id, family) sample.add_tag("ripped:" + family) else: self.log.warning("Couldn't upload original sample") else: sample = None if task.has_payload("parent"): parent = self._upload_file(task, mwdb, task.get_payload("parent"), parent=sample) if parent: self.log.info("[sample %s] Adding tag %s", parent.id, family) parent.add_tag(family) else: self.log.warning("Couldn't upload parent sample") else: parent = None config = self._upload_config(task, mwdb, family, config_data, parent=parent) return config
def process(self): # downloaded resource cache task_resources = dict(self.current_task.iterate_resources()) for plugin in self.plugins: name = plugin.handler.__name__ if any(map(lambda r: r not in task_resources.keys(), plugin.required)): self.log.info("Skipping %s, missing resources", name) continue try: self.log.debug("Running postprocess - %s", plugin.handler.__name__) outputs = plugin.handler( self.current_task, task_resources, self.backend.minio ) if outputs: for out in outputs: self.log.debug( f"Step {plugin.handler.__name__} outputted new resource: {out}" ) res_name = os.path.join( self.current_task.payload["analysis_uid"], out ) task_resources[out] = RemoteResource( res_name, uid=res_name, bucket="drakrun", backend=self.backend, ) except Exception: self.log.error("Postprocess failed", exc_info=True) task = Task( { "type": "analysis", "kind": "drakrun", } ) # Add metadata information about task analysis. metadata = json.loads(task_resources["metadata.json"].content) task.add_payload("metadata", metadata) # Add metadata information about dumps within dumps.zip task.add_payload( "dumps_metadata", self.current_task.get_payload("dumps_metadata") ) # metadata.json is internal, don't leak it to other services del task_resources["metadata.json"] for (name, resource) in task_resources.items(): task.add_payload(name, resource) self.send_task(task)
def process(self, task: Task) -> None: # type: ignore sample = task.get_resource("sample") sample_class = self._classify(task) file_name = sample.name or "sample" if sample_class is None: self.log.info( "Sample {!r} not recognized (unsupported type)".format( file_name.encode("utf8"))) res = task.derive_task({ "type": "sample", "stage": "unrecognized", "kind": "unknown", "quality": task.headers.get("quality", "high"), }) self.send_task(res) return classification_tag = get_tag(sample_class) self.log.info("Classified {!r} as {} and tag {}".format( file_name.encode("utf8"), repr(sample_class), classification_tag)) derived_task = task.derive_task(sample_class) # pass the original tags to the next task tags = [classification_tag] if derived_task.has_payload("tags"): tags += derived_task.get_payload("tags") derived_task.remove_payload("tags") derived_task.add_payload("tags", tags) # add a sha256 digest in the outgoing task if there # isn't one in the incoming task if "sha256" not in derived_task.payload["sample"].metadata: derived_task.payload["sample"].metadata["sha256"] = sha256( cast(bytes, sample.content)).hexdigest() self.send_task(derived_task)
def process_drakrun(self, task: Task) -> List[str]: log.info('Processing drakrun analysis') yara_matches: List[str] = [] with tempfile.TemporaryDirectory() as tmpdir: dumpsf = os.path.join(tmpdir, 'dumps.zip') task.get_resource('dumps.zip').download_to_file( dumpsf) # type: ignore zipf = zipfile.ZipFile(dumpsf) zipf.extractall(tmpdir) for rootdir, _dirs, files in os.walk(tmpdir): for filename in files: # skip non-dump files if not re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$", filename): continue with open(f"{rootdir}/{filename}", "rb") as dumpf: content = dumpf.read() yara_matches += self.scan_sample(content) return yara_matches
def process(self): # downloaded resource cache task_resources = dict(self.current_task.iterate_resources()) for plugin in self.plugins: name = plugin.handler.__name__ if any( map(lambda r: r not in task_resources.keys(), plugin.required)): self.log.info("Skipping %s, missing resources", name) continue try: self.log.debug("Running postprocess - %s", plugin.handler.__name__) outputs = plugin.handler(self.current_task, task_resources, self.backend.minio) if outputs: for out in outputs: self.log.debug( f"Step {plugin.handler.__name__} outputted new resource: {out}" ) res_name = os.path.join( self.current_task.payload["analysis_uid"], out) task_resources[out] = RemoteResource( res_name, uid=res_name, bucket='drakrun', backend=self.backend, ) except Exception: self.log.error("Postprocess failed", exc_info=True) task = Task({ "type": "analysis", "kind": "drakrun-processed", }) for (name, resource) in task_resources.items(): task.add_payload(name, resource) self.send_task(task)
def report_config(self, config, sample, parent=None): legacy_config = dict(config) legacy_config["type"] = config["family"] del legacy_config["family"] # This allows us to spawn karton tasks for special config handling if "store-in-karton" in legacy_config: self.log.info("Karton tasks found in config, sending") for karton_task in legacy_config["store-in-karton"]: task_data = karton_task["task"] payload_data = karton_task["payload"] payload_data["parent"] = parent or sample task = Task(headers=task_data, payload=payload_data) self.send_task(task) self.log.info("Sending ripped task %s", task.uid) del legacy_config["store-in-karton"] if len(legacy_config.items()) == 1: self.log.info( "Final config is empty, not sending it to the reporter") return task = Task( { "type": "config", "kind": "static", "family": config["family"], "quality": self.current_task.headers.get("quality", "high"), }, payload={ "config": legacy_config, "sample": sample, "parent": parent or sample, "tags": self.result_tags, "attributes": self.result_attributes, }, ) self.send_task(task)
def process_sample(self, task: Task, mwdb: MWDB) -> Optional[MWDBFile]: """ Processing of Sample task :param mwdb: MWDB instance :return: MWDBFile object or None """ if task.has_payload("parent"): parent = self._upload_file(task, mwdb, task.get_payload("parent")) else: parent = None if task.has_payload("sample"): sample = self._upload_file(task, mwdb, task.get_payload("sample"), parent=parent) else: sample = None return sample
def test_pass(self) -> None: res = Resource("sample", b"z") task = Task( { "type": "sample", "stage": "recognized", "kind": "runnable" }, payload={"sample": res}, ) res_tasks = self.run_task(task) self.assertTasksEqual(res_tasks, [])
def process(self, task: Task) -> None: # type: ignore headers = task.headers sample = task.get_resource("sample") yara_matches: List[str] = [] if headers["type"] == "sample": self.log.info(f"Processing sample {sample.metadata['sha256']}") if sample.content is not None: yara_matches = self.scan_sample(sample.content) elif headers["type"] == "analysis": if headers["kind"] == "cuckoo1": yara_matches += self.process_cuckoo(task) elif headers["kind"] == "drakrun": yara_matches += self.process_drakrun(task) elif headers["kind"] == "joesandbox": yara_matches += self.process_joesandbox(task) if not yara_matches: self.log.info("Couldn't match any yara rules") return None unique_matches = sorted(list(set(yara_matches))) self.log.info( "Got %d yara hits in total with %s distinct names", len(yara_matches), len(unique_matches), ) tag_task = Task( { "type": "sample", "stage": "analyzed" }, payload={ "sample": sample, "tags": unique_matches }, ) self.send_task(tag_task)
def test_match_2(self) -> None: res = Resource("sample", b"ab") input_task = Task( { "type": "sample", "stage": "recognized", "kind": "runnable" }, payload={"sample": res}, ) expected_task = Task( { "type": "sample", "origin": "karton.yaramatcher", "stage": "analyzed" }, payload={ "sample": res, "tags": ["yara:a", "yara:b"] }, ) res_tasks = self.run_task(input_task) self.assertTasksEqual(res_tasks, [expected_task])
def process(self, task: Task) -> None: # Get the incoming sample sample_resource = task.get_resource("sample") # Log with self.log self.log.info(f"Hi {sample_resource.name}, let me analyse you!") # Download the resource to a temporary file with sample_resource.download_temporary_file() as sample_file: # And run `strings` on it strings = subprocess.check_output(["strings", sample_file.name]) # Send our results for further processing or reporting task = Task( { "type": "sample", "stage": "analyzed" }, payload={ "parent": sample_resource, "sample": Resource("string", strings) }, ) self.send_task(task)
def process(self, task: Task) -> None: # type: ignore sample = task.get_resource("sample") ascii_content = sample.content classifier = AsciiClassifier(ascii_content) classifier.classify() decoder = Decoder(ascii_content, classifier.verdict) try: decoder.decode() except binascii.Error: logging.warning("Error why trying to decode base64.") return if decoder.decoded: self.log.info("Decoded possible executable") if decoder.decoded[:2] == b"MZ": task_params = { "type": "sample", "kind": "runnable", "stage": "recognized", "platform": "win32", "extension": "exe", } else: task_params = {"type": "sample", "kind": "raw"} new_sample = Resource( sample.name, decoder.decoded, ) task = Task(task_params, payload={ "sample": new_sample, "parent": sample }) self.send_task(task)
def submit_main(cls): parser = cls.args_parser() args = parser.parse_args() conf_path = os.path.join(ETC_DIR, "config.ini") config = patch_config(Config(conf_path)) with open(args.tests) as tests: testcases = [TestCase(**case) for case in json.load(tests)] root_uids = [] for test in testcases: sample = test.get_sample() sys.stderr.write(f"Submitting {test.sha256}\n") t = Task(headers=dict(type="sample-test", platform="win64")) t.add_payload("sample", Resource("malwar", sample)) t.add_payload("testcase", test.to_json()) if args.timeout: t.add_payload("timeout", args.timeout) p = Producer(config) p.send_task(t) root_uids.append(t.root_uid) consumer = RegressionTester(config) results = {} with tqdm(total=len(root_uids)) as pbar: while len(results) != len(root_uids): for root_uid in cls.get_finished_tasks(consumer.backend, root_uids): if root_uid not in results: res = json.load( consumer.backend.minio.get_object( "draktestd", root_uid)) results[root_uid] = res print(json.dumps(results[root_uid])) pbar.update(1) time.sleep(1) print(json.dumps(list(results.values())))
def process_cuckoo(self, task: Task) -> List[str]: yara_matches: List[str] = [] analysis = task.get_payload("analysis") log.info(f"Processing cuckoo analysis {analysis.name}") with analysis.extract_temporary() as analysis_dir: dump_dir = f"{analysis_dir}/dumps" for rootdir, _dirs, files in os.walk(dump_dir): for filename in files: if filename.endswith(".txt") or filename.endswith( ".metadata"): continue log.debug(f"Checking {filename}") with open(f"{rootdir}/{filename}", "rb") as dumpf: content = dumpf.read() yara_matches += self.scan_sample(content) return yara_matches