Exemple #1
0
    def process(self, task: Task) -> None:  # type: ignore
        sample = task.get_resource("sample")
        headers = task.headers

        if headers["type"] == "sample":
            self.log.info("Analyzing original binary")
            self.analyze_sample(sample)
        elif headers["type"] == "analysis":
            sample_hash = hashlib.sha256(sample.content or b"").hexdigest()
            self.log.info(f"Processing analysis, sample: {sample_hash}")
            dumps = task.get_resource("dumps.zip")
            dumps_metadata = task.get_payload("dumps_metadata")
            with dumps.extract_temporary() as tmpdir:  # type: ignore
                dump_infos = []
                for dump_metadata in dumps_metadata:
                    dump_path = os.path.join(tmpdir, dump_metadata["filename"])
                    if not self._is_safe_path(tmpdir, dump_path):
                        self.log.warning(
                            f"Path traversal attempt: {dump_path}")
                        continue
                    dump_base = int(dump_metadata["base_address"], 16)
                    dump_infos.append(DumpInfo(path=dump_path, base=dump_base))
                self.analyze_dumps(sample, dump_infos)

        self.log.debug("Printing gc stats")
        self.log.debug(gc.get_stats())
    def process(self, task: Task):
        dumps = task.get_resource("dumps.zip")
        sample = task.get_resource("sample")
        with dumps.extract_temporary() as temp:
            family = self.analyze_dumps(sample, temp)

            testcase = TestCase.from_json(task.payload["testcase"])
            expected_family = testcase.ripped

            if family is None or expected_family != family:
                self.log.error(
                    f"Failed to rip {sample.sha256}. Expected {expected_family}, ripped {family}"
                )
                result = 'FAIL'
            else:
                self.log.info(f"Ripping {sample.sha256} OK: {family}")
                result = 'OK'

            out_res = json.dumps({
                "sample": sample.sha256,
                "family": {
                    "expected": expected_family,
                    "ripped": family
                },
                "result": result
            })

            task = Task({"type": "analysis-test-result", "kind": "drakrun"})
            res = LocalResource(name=self.current_task.root_uid,
                                bucket='draktestd',
                                content=out_res)
            res._uid = res.name
            task.add_payload("result", res)
            self.send_task(task)
 def process_joesandbox(self, task: Task) -> List[str]:
     log.info("Processing joesandbox analysis")
     yara_matches: List[str] = []
     with tempfile.TemporaryDirectory() as tmpdir:
         dumpsf = os.path.join(tmpdir, "dumps.zip")
         task.get_resource("dumps.zip").download_to_file(
             dumpsf)  # type: ignore
         zipf = zipfile.ZipFile(dumpsf)
         zipf.extractall(tmpdir, pwd=b"infected")
         for rootdir, _dirs, files in os.walk(tmpdir):
             for filename in files:
                 with open(f"{rootdir}/{filename}", "rb") as dumpf:
                     content = dumpf.read()
                 yara_matches += self.scan_sample(content)
     return yara_matches
    def process_drakrun(self, task: Task) -> List[str]:
        log.info('Processing drakrun analysis')
        yara_matches: List[str] = []
        with tempfile.TemporaryDirectory() as tmpdir:
            dumpsf = os.path.join(tmpdir, 'dumps.zip')
            task.get_resource('dumps.zip').download_to_file(
                dumpsf)  # type: ignore
            zipf = zipfile.ZipFile(dumpsf)
            zipf.extractall(tmpdir)
            for rootdir, _dirs, files in os.walk(tmpdir):
                for filename in files:
                    # skip non-dump files
                    if not re.match(r"^[a-f0-9]{4,16}_[a-f0-9]{16}$",
                                    filename):
                        continue

                    with open(f"{rootdir}/{filename}", "rb") as dumpf:
                        content = dumpf.read()
                    yara_matches += self.scan_sample(content)
        return yara_matches
    def process(self, task: Task) -> None:  # type: ignore
        sample = task.get_resource("sample")
        sample_class = self._classify(task)

        file_name = sample.name or "sample"

        if sample_class is None:
            self.log.info(
                "Sample {!r} not recognized (unsupported type)".format(
                    file_name.encode("utf8")))
            res = task.derive_task({
                "type":
                "sample",
                "stage":
                "unrecognized",
                "kind":
                "unknown",
                "quality":
                task.headers.get("quality", "high"),
            })
            self.send_task(res)
            return

        classification_tag = get_tag(sample_class)
        self.log.info("Classified {!r} as {} and tag {}".format(
            file_name.encode("utf8"), repr(sample_class), classification_tag))

        derived_task = task.derive_task(sample_class)
        # pass the original tags to the next task
        tags = [classification_tag]
        if derived_task.has_payload("tags"):
            tags += derived_task.get_payload("tags")
            derived_task.remove_payload("tags")

        derived_task.add_payload("tags", tags)

        # add a sha256 digest in the outgoing task if there
        # isn't one in the incoming task
        if "sha256" not in derived_task.payload["sample"].metadata:
            derived_task.payload["sample"].metadata["sha256"] = sha256(
                cast(bytes, sample.content)).hexdigest()

        self.send_task(derived_task)
    def process(self, task: Task) -> None:  # type: ignore
        headers = task.headers
        sample = task.get_resource("sample")
        yara_matches: List[str] = []

        if headers["type"] == "sample":
            self.log.info(f"Processing sample {sample.metadata['sha256']}")
            if sample.content is not None:
                yara_matches = self.scan_sample(sample.content)
        elif headers["type"] == "analysis":
            if headers["kind"] == "cuckoo1":
                yara_matches += self.process_cuckoo(task)
            elif headers["kind"] == "drakrun":
                yara_matches += self.process_drakrun(task)
            elif headers["kind"] == "joesandbox":
                yara_matches += self.process_joesandbox(task)

        if not yara_matches:
            self.log.info("Couldn't match any yara rules")
            return None

        unique_matches = sorted(list(set(yara_matches)))

        self.log.info(
            "Got %d yara hits in total with %s distinct names",
            len(yara_matches),
            len(unique_matches),
        )

        tag_task = Task(
            {
                "type": "sample",
                "stage": "analyzed"
            },
            payload={
                "sample": sample,
                "tags": unique_matches
            },
        )
        self.send_task(tag_task)
    def process(self, task: Task) -> None:
        # Get the incoming sample
        sample_resource = task.get_resource("sample")

        # Log with self.log
        self.log.info(f"Hi {sample_resource.name}, let me analyse you!")

        # Download the resource to a temporary file
        with sample_resource.download_temporary_file() as sample_file:
            # And run `strings` on it
            strings = subprocess.check_output(["strings", sample_file.name])

        # Send our results for further processing or reporting
        task = Task(
            {
                "type": "sample",
                "stage": "analyzed"
            },
            payload={
                "parent": sample_resource,
                "sample": Resource("string", strings)
            },
        )
        self.send_task(task)
Exemple #8
0
    def process(self, task: Task) -> None:  # type: ignore
        sample = task.get_resource("sample")
        ascii_content = sample.content

        classifier = AsciiClassifier(ascii_content)
        classifier.classify()
        decoder = Decoder(ascii_content, classifier.verdict)
        try:
            decoder.decode()
        except binascii.Error:
            logging.warning("Error why trying to decode base64.")
            return

        if decoder.decoded:
            self.log.info("Decoded possible executable")
            if decoder.decoded[:2] == b"MZ":
                task_params = {
                    "type": "sample",
                    "kind": "runnable",
                    "stage": "recognized",
                    "platform": "win32",
                    "extension": "exe",
                }
            else:
                task_params = {"type": "sample", "kind": "raw"}
            new_sample = Resource(
                sample.name,
                decoder.decoded,
            )

            task = Task(task_params,
                        payload={
                            "sample": new_sample,
                            "parent": sample
                        })
            self.send_task(task)
Exemple #9
0
    def process(self, task: Task) -> None:
        sample = task.get_resource("sample")
        task_password = task.get_payload("password", default=None)

        attributes = task.get_payload("attributes", default={})
        if not task_password and attributes.get("password"):
            self.log.info("Accepting password from attributes")
            task_password = attributes.get("password")[0]

        try:
            if sample.name:
                fname = sample.name.encode("utf8")

                classifier_extension = "." + task.headers.get("extension")
                if classifier_extension and not fname.endswith(
                        classifier_extension.encode("utf-8")):
                    fname += classifier_extension.encode("utf-8")

        except Exception as e:
            self.log.warning("Exception during extraction: %r", e)
            fname = None

        extraction_level = task.get_payload("extraction_level", 0)

        if extraction_level > self.max_depth:
            self.log.warning(
                "Maximum extraction depth exceeded. Can't extract this archive."
            )
            return

        with tempfile.TemporaryDirectory() as dir_name:
            filepath = f"{dir_name}/{fname}"
            with open(filepath, "wb") as f:
                f.write(sample.content)

            archive_password = None
            if task_password is not None:
                archive_password = task_password.encode()

            unpacked = unpack(
                filename=fname,
                filepath=filepath.encode("utf-8"),
                password=archive_password,
            )

        try:
            fname = (unpacked.filename
                     and unpacked.filename.decode("utf8")) or unpacked.sha256
        except Exception as e:
            self.log.warning("Exception during extraction: %r", e)
            fname = "(unknown)"

        self.log.info("Got archive {}".format(fname))

        if not unpacked.children:
            self.log.warning("Don't know how to unpack this archive")
            return

        for child in unpacked.children:
            fname = (child.filename
                     and child.filename.decode("utf8")) or child.sha256

            self.log.info("Unpacked child {}".format(fname))

            if not child.contents:
                self.log.warning(
                    "Child has no contents or protected by unknown password")
                continue

            if len(child.contents) > self.max_size:
                self.log.warning("Child is too big for further processing")
                continue

            task = Task(
                headers={
                    "type": "sample",
                    "kind": "raw",
                    "quality": task.headers.get("quality", "high"),
                },
                payload={
                    "sample": Resource(fname, child.contents),
                    "parent": sample,
                    "extraction_level": extraction_level + 1,
                },
            )
            self.send_task(task)
Exemple #10
0
    def _classify(self, task: Task) -> Optional[Dict[str, Optional[str]]]:
        sample = task.get_resource("sample")
        content = cast(bytes, sample.content)

        magic = task.get_payload("magic") or ""
        magic_mime = task.get_payload("mime") or ""
        try:
            magic = self._magic(content, mime=False)
            magic_mime = self._magic(content, mime=True)
        except Exception as ex:
            self.log.warning(f"unable to get magic: {ex}")

        extension = self._get_extension(sample.name or "sample")
        sample_class = {
            "magic": magic if magic else None,
            "mime": magic_mime if magic_mime else None,
            "kind": None,
            "platform": None,
            "extension": None,
        }

        # Is PE file?
        if magic.startswith("PE32") or magic.startswith(
                "MS-DOS executable PE32"):
            sample_class.update({
                "kind": "runnable",
                "platform": "win32",
                "extension": "exe"
            })
            if magic.startswith("PE32+"):
                sample_class["platform"] = "win64"  # 64-bit only executable
            if "(DLL)" in magic:
                sample_class["extension"] = "dll"
            return sample_class

        # ZIP-contained files?
        def zip_has_file(path: str) -> bool:
            try:
                ZipFile(BytesIO(content)).getinfo(path)
                return True
            except Exception:
                return False

        if magic.startswith("Zip archive data") or magic.startswith(
                "Java archive data (JAR)"):
            if extension == "apk" or zip_has_file("AndroidManifest.xml"):
                sample_class.update({
                    "kind": "runnable",
                    "platform": "android",
                    "extension": "apk"
                })
                return sample_class

            if extension == "jar" or zip_has_file("META-INF/MANIFEST.MF"):
                sample_class.update({
                    "kind": "runnable",
                    "platform": "win32",  # Default platform should be Windows
                    "extension": "jar",
                })
                return sample_class

        # Dalvik Android files?
        if magic.startswith("Dalvik dex file") or extension == "dex":
            sample_class.update({
                "kind": "runnable",
                "platform": "android",
                "extension": "dex"
            })
            return sample_class

        # Shockwave Flash?
        if magic.startswith("Macromedia Flash") or extension == "swf":
            sample_class.update({
                "kind": "runnable",
                "platform": "win32",
                "extension": "swf"
            })
            return sample_class

        # Windows LNK?
        if magic.startswith("MS Windows shortcut") or extension == "lnk":
            sample_class.update({
                "kind": "runnable",
                "platform": "win32",
                "extension": "lnk"
            })
            return sample_class

        # Is ELF file?
        if magic.startswith("ELF"):
            sample_class.update({"kind": "runnable", "platform": "linux"})
            return sample_class

        # Is PKG file?
        if magic.startswith("xar archive") or extension == "pkg":
            sample_class.update({
                "kind": "runnable",
                "platform": "macos",
                "extension": "pkg"
            })
            return sample_class

        # Is DMG file?
        if extension == "dmg" or all([
                len(content) > 512,
                content[-512:][:4] == b"koly",
                content[-512:][8:12] == b"\x00\x00\x02\x00",
        ]):
            sample_class.update({
                "kind": "runnable",
                "platform": "macos",
                "extension": "dmg"
            })
            return sample_class

        # Is mach-o file?
        if magic.startswith("Mach-O"):
            sample_class.update({"kind": "runnable", "platform": "macos"})
            return sample_class

        def zip_has_mac_app() -> bool:
            try:
                zipfile = ZipFile(BytesIO(content))
                return any(
                    x.filename.lower().endswith(".app/contents/info.plist")
                    for x in zipfile.filelist)
            except Exception:
                return False

        # macos app within zip
        if magic.startswith("Zip archive data") and zip_has_mac_app():
            sample_class.update({
                "kind": "runnable",
                "platform": "macos",
                "extension": "app"
            })
            return sample_class

        # Windows scripts (per extension)
        script_extensions = [
            "vbs",
            "vbe",
            "js",
            "jse",
            "wsh",
            "wsf",
            "hta",
            "cmd",
            "bat",
            "ps1",
        ]
        if extension in script_extensions:
            sample_class.update({
                "kind": "script",
                "platform": "win32",
                "extension": extension
            })
            return sample_class

        # Office documents
        office_extensions = {
            "doc": "Microsoft Word",
            "xls": "Microsoft Excel",
            "ppt": "Microsoft PowerPoint",
        }
        # Check RTF by libmagic
        if magic.startswith("Rich Text Format"):
            sample_class.update({
                "kind": "document",
                "platform": "win32",
                "extension": "rtf"
            })
            return sample_class
        # Check Composite Document (doc/xls/ppt) by libmagic and extension
        if magic.startswith("Composite Document File"):
            # MSI installers are also CDFs
            if "MSI Installer" in magic:
                sample_class.update({
                    "kind": "runnable",
                    "platform": "win32",
                    "extension": "msi"
                })
                return sample_class
            # If not MSI, treat it like Office document
            sample_class.update({
                "kind": "document",
                "platform": "win32",
            })

            for ext, typepart in office_extensions.items():
                if f"Name of Creating Application: {typepart}" in magic:
                    sample_class["extension"] = ext
                    return sample_class

            if extension[:3] in office_extensions.keys():
                sample_class["extension"] = extension
            else:
                sample_class["extension"] = "doc"
            return sample_class

        # Check docx/xlsx/pptx by libmagic
        for ext, typepart in office_extensions.items():
            if magic.startswith(typepart):
                sample_class.update({
                    "kind": "document",
                    "platform": "win32",
                    "extension": ext + "x"
                })
                return sample_class

        # Check RTF by extension
        if extension == "rtf":
            sample_class.update({
                "kind": "document",
                "platform": "win32",
                "extension": "rtf"
            })
            return sample_class

        # Finally check document type only by extension
        if extension[:3] in office_extensions.keys():
            sample_class.update({
                "kind": "document",
                "platform": "win32",
                "extension": extension
            })
            return sample_class

        # Unclassified Open XML documents
        if magic.startswith("Microsoft OOXML"):
            try:
                extn = classify_openxml(content)
                if extn:
                    sample_class.update({
                        "kind": "document",
                        "platform": "win32",
                        "extension": extn,
                    })
                    return sample_class
            except Exception:
                self.log.exception("Error while trying to classify OOXML")

        # PDF files
        if magic.startswith("PDF document") or extension == "pdf":
            sample_class.update({
                "kind": "document",
                "platform": "win32",
                "extension": "pdf"
            })
            return sample_class

        # Archives
        archive_assoc = {
            "7z": ["7-zip archive data"],
            "ace": ["ACE archive data"],
            "bz2": ["bzip2 compressed data"],
            "cab": ["Microsoft Cabinet archive data"],
            "gz": ["gzip compressed"],
            "iso": ["ISO 9660 CD-ROM"],
            "lz": ["lzip compressed data"],
            "tar": ["tar archive", "POSIX tar archive"],
            "rar": ["RAR archive data"],
            "udf": ["UDF filesystem data"],
            "xz": ["XZ compressed data"],
            "zip": ["Zip archive data"],
            "zlib": ["zlib compressed data"],
        }
        archive_extensions = [
            "ace",
            "zip",
            "rar",
            "tar",
            "cab",
            "gz",
            "7z",
            "bz2",
            "arj",
            "iso",
            "xz",
            "lz",
            "udf",
            "cab",
            "zlib",
        ]

        def apply_archive_headers(extension):
            headers = {"kind": "archive", "extension": extension}
            if extension == "xz":
                # libmagic >= 5.40 generates correct MIME type for XZ archives
                headers["mime"] = "application/x-xz"
            sample_class.update(headers)
            return sample_class

        for archive_extension, assocs in archive_assoc.items():
            if any(magic.startswith(assoc) for assoc in assocs):
                return apply_archive_headers(archive_extension)

        if extension in archive_extensions:
            return apply_archive_headers(extension)

        # E-mail
        email_assoc = {
            "msg": ["Microsoft Outlook Message"],
            "eml": ["multipart/mixed", "RFC 822 mail", "SMTP mail"],
        }
        for ext, patterns in email_assoc.items():
            if any(pattern in magic for pattern in patterns):
                sample_class.update({"kind": "archive", "extension": ext})
                return sample_class

        if extension in email_assoc.keys():
            sample_class.update({"kind": "archive", "extension": extension})
            return sample_class

        # HTML
        if magic.startswith("HTML document"):
            sample_class.update({"kind": "html"})
            return sample_class

        # Linux scripts
        if ("script" in magic and "executable" in magic) or extension == "sh":
            sample_class.update({
                "kind": "script",
                "platform": "linux",
                "extension": extension
            })
            return sample_class

        # Content heuristics
        partial = content[:2048] + content[-2048:]

        # Dumped PE file heuristics (PE not recognized by libmagic)
        if b".text" in partial and b"This program cannot be run" in partial:
            sample_class.update({
                "kind": "dump",
                "platform": "win32",
                "extension": "exe"
            })
            return sample_class

        if len(partial) > 0x40:
            pe_offs = struct.unpack("<H", partial[0x3C:0x3E])[0]
            if partial[pe_offs:pe_offs + 2] == b"PE":
                sample_class.update({
                    "kind": "dump",
                    "platform": "win32",
                    "extension": "exe"
                })
                return sample_class

        if partial.startswith(b"MZ"):
            sample_class.update({
                "kind": "dump",
                "platform": "win32",
                "extension": "exe"
            })
            return sample_class

        # Heuristics for scripts
        try:
            try:
                partial_str = partial.decode(
                    chardet.detect(partial)["encoding"]).lower()
            except Exception:
                self.log.warning("Heuristics disabled - unknown encoding")
            else:
                vbs_keywords = [
                    "end function",
                    "end if",
                    "array(",
                    "sub ",
                    "on error ",
                    "createobject",
                    "execute",
                ]
                js_keywords = [
                    "function ",
                    "function(",
                    "this.",
                    "this[",
                    "new ",
                    "createobject",
                    "activexobject",
                    "var ",
                    "catch",
                ]
                html_keywords = ["<!doctype", "<html", "<script"]
                ps_keywords = [
                    "powershell",
                    "-nop",
                    "bypass",
                    "new-object",
                    "invoke-expression",
                    "frombase64string(",
                    "| iex",
                    "|iex",
                ]
                if (len([
                        True
                        for keyword in html_keywords if keyword in partial_str
                ]) >= 2):
                    sample_class.update({"kind": "html"})
                    return sample_class

                if (len([
                        True
                        for keyword in vbs_keywords if keyword in partial_str
                ]) >= 2):
                    sample_class.update({
                        "kind": "script",
                        "platform": "win32",
                        "extension": "vbs"
                    })
                    return sample_class
                # Powershell heuristics
                if len([
                        True for keyword in ps_keywords
                        if keyword.lower() in partial_str
                ]):
                    sample_class.update({
                        "kind": "script",
                        "platform": "win32",
                        "extension": "ps1"
                    })
                    return sample_class
                # JS heuristics
                if (len([
                        True
                        for keyword in js_keywords if keyword in partial_str
                ]) >= 2):
                    sample_class.update({
                        "kind": "script",
                        "platform": "win32",
                        "extension": "js"
                    })
                    return sample_class
                # JSE heuristics
                if re.match("#@~\\^[a-zA-Z0-9+/]{6}==", partial_str):
                    sample_class.update({
                        "kind": "script",
                        "platform": "win32",
                        "extension": "jse",  # jse is more possible than vbe
                    })
                    return sample_class
                if magic.startswith("ASCII"):
                    sample_class.update({
                        "kind": "ascii",
                    })
                    return sample_class
                if magic.startswith("ISO-8859"):
                    sample_class.update({
                        "kind": "iso-8859-1",
                    })
                    return sample_class
                if magic.startswith("UTF-8"):
                    sample_class.update({
                        "kind": "utf-8",
                    })
                    return sample_class
                if magic.startswith("PGP"):
                    sample_class.update({
                        "kind": "pgp",
                    })
                    return sample_class
                if magic.startswith("pcap capture file"):
                    sample_class.update({
                        "kind": "pcap",
                    })
                    return sample_class
                if magic.startswith("pcap") and "ng capture file" in magic:
                    sample_class.update({
                        "kind": "pcapng",
                    })
                    return sample_class
        except Exception as e:
            self.log.exception(e)

        # If not recognized then unsupported
        return None
Exemple #11
0
    def process(self, task: Task):
        # Gather basic facts
        sample = task.get_resource("sample")
        magic_output = magic.from_buffer(sample.content)
        sha256sum = hashlib.sha256(sample.content).hexdigest()

        self.log.info(f"Running on: {socket.gethostname()}")
        self.log.info(f"Sample SHA256: {sha256sum}")
        self.log.info(f"Analysis UID: {self.analysis_uid}")

        # Timeout sanity check
        timeout = task.payload.get('timeout') or self.default_timeout
        hard_time_limit = 60 * 20
        if timeout > hard_time_limit:
            self.log.error(
                "Tried to run the analysis for more than hard limit of %d seconds",
                hard_time_limit)
            return

        self.update_vnc_info()

        # Get sample extension. If none set, fall back to exe/dll
        extension = task.headers.get("extension", "exe").lower()
        if '(DLL)' in magic_output:
            extension = 'dll'
        self.log.info("Running file as %s", extension)

        # Prepare sample file name
        file_name = task.payload.get("file_name", "malwar") + f".{extension}"
        # Alphanumeric, dot, underscore, dash
        if not re.match(r"^[a-zA-Z0-9\._\-]+$", file_name):
            self.log.error("Filename contains invalid characters")
            return
        self.log.info("Using file name %s", file_name)

        # workdir - configs, sample, etc.
        # outdir - analysis artifacts
        workdir, outdir = self._prepare_workdir()

        sample_path = os.path.join(workdir, file_name)
        sample.download_to_file(sample_path)

        # Try to come up with a start command for this file
        # or use the one provided by the sender
        cmd = self._get_start_command(extension, sample, sample_path)

        start_command = task.payload.get("start_command", cmd)
        if not start_command:
            self.log.error(
                "Unable to run malware sample. Could not generate any suitable"
                " command to run it.")
            return
        self.log.info("Start command: %s", start_command)

        # If task contains 'custom_hooks' override local defaults
        with open(os.path.join(workdir, "hooks.txt"), "wb") as hooks:
            if task.has_payload("custom_hooks"):
                custom_hooks = task.get_resource("custom_hooks")
                assert custom_hooks.content is not None
                hooks.write(custom_hooks.content)
            else:
                with open(os.path.join(ETC_DIR, "hooks.txt"),
                          "rb") as default_hooks:
                    hooks.write(default_hooks.read())

        metadata = {
            "sample_sha256": sha256sum,
            "magic_output": magic_output,
            "time_started": int(time.time())
        }

        max_attempts = 3
        for i in range(max_attempts):
            try:
                self.log.info(
                    f"Trying to analyze sample (attempt {i + 1}/{max_attempts})"
                )
                info = self.analyze_sample(sample_path, workdir, outdir,
                                           start_command, timeout)
                metadata.update(info)
                break
            except Exception:
                self.log.exception("Analysis attempt failed. Retrying...")
        else:
            self.log.error(f"Giving up after {max_attempts} failures...")
            return

        self.log.info("Analysis done. Collecting artifacts...")

        # Make sure dumps have a reasonable size
        self.crop_dumps(os.path.join(outdir, 'dumps'),
                        os.path.join(outdir, 'dumps.zip'))

        # Compress IPT traces, they're quite large however they compress well
        self.compress_ipt(os.path.join(outdir, 'ipt'),
                          os.path.join(outdir, 'ipt.zip'))

        metadata['time_finished'] = int(time.time())

        with open(os.path.join(outdir, 'metadata.json'), 'w') as f:
            f.write(json.dumps(metadata))

        quality = task.headers.get("quality", "high")
        self.send_analysis(sample, outdir, metadata, quality)
Exemple #12
0
    def process(self, task: Task) -> None:
        sample = task.get_resource("sample")
        resources = None

        m = self.yara.match(data=sample.content)
        if "autoit_v3_00" in m:
            self.log.info("Found a possible autoit v3.00 binary")
            resources = extract(data=sample.content,
                                version=AutoItVersion.EA05)
        elif "autoit_v3_26" in m:
            self.log.info("Found a possible autoit v3.26+ binary")
            resources = extract(data=sample.content,
                                version=AutoItVersion.EA06)

        if resources:
            self.log.info("Found embedded data, reporting!")

            for res_name, res_data in resources:
                if res_name.endswith(".dll") or res_name.endswith(".exe"):
                    task_params = {
                        "type": "sample",
                        "kind": "raw",
                    }
                elif res_name == "script.au3":
                    task_params = {
                        "type": "sample",
                        "kind": "script",
                        "stage": "analyzed",
                        "extension": "au3",
                    }
                else:
                    continue

                self.log.info("Sending a task with %s", res_name)
                script = Resource(res_name, res_data)
                self.send_task(
                    Task(task_params,
                         payload={
                             "sample": script,
                             "parent": sample
                         }))
                if res_name == "script.au3":
                    self.log.info(
                        "Looking for a binary embedded in the script")
                    drop = extract_binary(res_data.decode())
                    if drop:
                        self.log.info("Found an embedded binary")
                        self.send_task(
                            Task(
                                {
                                    "type": "sample",
                                    "kind": "raw"
                                },
                                payload={
                                    "sample":
                                    Resource(name="autoit_drop.exe",
                                             content=drop),
                                    "parent":
                                    script,
                                },
                            ))
Exemple #13
0
 def process(self, task: Task) -> None:
     sample = task.get_resource("sample")
     for module in self.modules:
         tasks = unpacker_module_worker(sample, self.user_config, module)
         for task in tasks:
             self.send_task(task)
Exemple #14
0
    def _classify(self, task: Task) -> Optional[Dict[str, str]]:
        sample = task.get_resource("sample")
        content = cast(bytes, sample.content)
        magic = task.get_payload("magic") or pymagic.from_buffer(content)
        extension = self._get_extension(sample.name or "sample")
        sample_type = {
            "type": "sample",
            "stage": "recognized",
            "quality": task.headers.get("quality", "high"),
        }

        # Is PE file?
        if magic.startswith("PE32") or magic.startswith("MS-DOS executable PE32"):
            sample_type.update(
                {"kind": "runnable", "platform": "win32", "extension": "exe"}
            )
            if magic.startswith("PE32+"):
                sample_type["platform"] = "win64"  # 64-bit only executable
            if "(DLL)" in magic:
                sample_type["extension"] = "dll"
            return sample_type

        # ZIP-contained files?
        def zip_has_file(path: str) -> bool:
            try:
                ZipFile(BytesIO(content)).getinfo(path)
                return True
            except Exception:
                return False

        if magic.startswith("Zip archive data") or magic.startswith(
            "Java archive data (JAR)"
        ):
            if extension == "apk" or zip_has_file("AndroidManifest.xml"):
                sample_type.update(
                    {"kind": "runnable", "platform": "android", "extension": "apk"}
                )
                return sample_type

            if extension == "jar" or zip_has_file("META-INF/MANIFEST.MF"):
                sample_type.update(
                    {
                        "kind": "runnable",
                        "platform": "win32",  # Default platform should be Windows
                        "extension": "jar",
                    }
                )
                return sample_type

        # Dalvik Android files?
        if magic.startswith("Dalvik dex file") or extension == "dex":
            sample_type.update(
                {"kind": "runnable", "platform": "android", "extension": "dex"}
            )
            return sample_type

        # Shockwave Flash?
        if magic.startswith("Macromedia Flash") or extension == "swf":
            sample_type.update(
                {"kind": "runnable", "platform": "win32", "extension": "swf"}
            )
            return sample_type

        # Windows LNK?
        if magic.startswith("MS Windows shortcut") or extension == "lnk":
            sample_type.update(
                {"kind": "runnable", "platform": "win32", "extension": "lnk"}
            )
            return sample_type

        # Is ELF file?
        if magic.startswith("ELF"):
            sample_type.update({"kind": "runnable", "platform": "linux"})
            return sample_type

        # Windows scripts (per extension)
        script_extensions = [
            "vbs",
            "vbe",
            "js",
            "jse",
            "wsh",
            "wsf",
            "hta",
            "cmd",
            "bat",
            "ps1",
        ]
        if extension in script_extensions:
            sample_type.update(
                {"kind": "script", "platform": "win32", "extension": extension}
            )
            return sample_type

        # Office documents
        office_extensions = {
            "doc": "Microsoft Word",
            "xls": "Microsoft Excel",
            "ppt": "Microsoft PowerPoint",
        }
        # Check RTF by libmagic
        if magic.startswith("Rich Text Format"):
            sample_type.update(
                {"kind": "document", "platform": "win32", "extension": "rtf"}
            )
            return sample_type
        # Check Composite Document (doc/xls/ppt) by libmagic and extension
        if magic.startswith("Composite Document File"):
            # MSI installers are also CDFs
            if "MSI Installer" in magic:
                sample_type.update(
                    {"kind": "runnable", "platform": "win32", "extension": "msi"}
                )
                return sample_type
            # If not MSI, treat it like Office document
            sample_type.update(
                {
                    "kind": "document",
                    "platform": "win32",
                }
            )
            if extension[:3] in office_extensions.keys():
                sample_type["extension"] = extension
            else:
                sample_type["extension"] = "doc"
            return sample_type

        # Check docx/xlsx/pptx by libmagic
        for ext, typepart in office_extensions.items():
            if magic.startswith(typepart):
                sample_type.update(
                    {"kind": "document", "platform": "win32", "extension": ext + "x"}
                )
                return sample_type

        # Check RTF by extension
        if extension == "rtf":
            sample_type.update(
                {"kind": "document", "platform": "win32", "extension": "rtf"}
            )
            return sample_type

        # Finally check document type only by extension
        if extension[:3] in office_extensions.keys():
            sample_type.update(
                {"kind": "document", "platform": "win32", "extension": extension}
            )
            return sample_type

        # Unclassified Open XML documents
        if magic.startswith("Microsoft OOXML"):
            try:
                extn = classify_openxml(content)
                if extn:
                    sample_type.update(
                        {
                            "kind": "document",
                            "platform": "win32",
                            "extension": extn,
                        }
                    )
                    return sample_type
            except Exception:
                self.log.exception("Error while trying to classify OOXML")

        # PDF files
        if magic.startswith("PDF document") or extension == "pdf":
            sample_type.update(
                {"kind": "document", "platform": "win32", "extension": "pdf"}
            )
            return sample_type

        # Archives
        archive_assoc = {
            "7z": ["7-zip archive data"],
            "ace": ["ACE archive data"],
            "bz2": ["bzip2 compressed data"],
            "cab": ["Microsoft Cabinet archive data"],
            "gz": ["gzip compressed"],
            "iso": ["ISO 9660 CD-ROM"],
            "lz": ["lzip compressed data"],
            "tar": ["tar archive", "POSIX tar archive"],
            "rar": ["RAR archive data"],
            "udf": ["UDF filesystem data"],
            "xz": ["XZ compressed data"],
            "zip": ["Zip archive data"],
            "zlib": ["zlib compressed data"],
        }
        archive_extensions = [
            "ace",
            "zip",
            "rar",
            "tar",
            "cab",
            "gz",
            "7z",
            "bz2",
            "arj",
            "iso",
            "xz",
            "lz",
            "udf",
            "cab",
            "zlib",
        ]
        for ext in archive_extensions:
            if ext in archive_assoc:
                if any(magic.startswith(x) for x in archive_assoc[ext]):
                    sample_type.update({"kind": "archive", "extension": ext})
                    return sample_type
        if extension in archive_extensions:
            sample_type.update({"kind": "archive", "extension": extension})
            return sample_type

        # E-mail
        email_assoc = {"msg": "Microsoft Outlook Message", "eml": "multipart/mixed"}
        for ext in email_assoc.keys():
            if email_assoc[ext] in magic:
                sample_type.update({"kind": "archive", "extension": ext})
                return sample_type

        if extension in email_assoc.keys():
            sample_type.update({"kind": "archive", "extension": extension})
            return sample_type

        # HTML
        if magic.startswith("HTML document"):
            sample_type.update({"kind": "html"})
            return sample_type

        # Linux scripts
        if ("script" in magic and "executable" in magic) or extension == "sh":
            sample_type.update(
                {"kind": "script", "platform": "linux", "extension": extension}
            )
            return sample_type

        # Content heuristics
        partial = content[:2048] + content[-2048:]

        # Dumped PE file heuristics (PE not recognized by libmagic)
        if b".text" in partial and b"This program cannot be run" in partial:
            sample_type.update(
                {"kind": "dump", "platform": "win32", "extension": "exe"}
            )
            return sample_type

        if len(partial) > 0x40:
            pe_offs = struct.unpack("<H", partial[0x3C:0x3E])[0]
            if partial[pe_offs : pe_offs + 2] == b"PE":
                sample_type.update(
                    {"kind": "dump", "platform": "win32", "extension": "exe"}
                )
                return sample_type

        if partial.startswith(b"MZ"):
            sample_type.update(
                {"kind": "dump", "platform": "win32", "extension": "exe"}
            )
            return sample_type

        # Heuristics for scripts
        try:
            try:
                partial_str = partial.decode(
                    chardet.detect(partial)["encoding"]
                ).lower()
            except Exception:
                self.log.warning("Heuristics disabled - unknown encoding")
            else:
                vbs_keywords = [
                    "end function",
                    "end if",
                    "array(",
                    "sub ",
                    "on error ",
                    "createobject",
                    "execute",
                ]
                js_keywords = [
                    "function ",
                    "function(",
                    "this.",
                    "this[",
                    "new ",
                    "createobject",
                    "activexobject",
                    "var ",
                    "catch",
                ]
                html_keywords = ["<!doctype", "<html", "<script"]
                ps_keywords = [
                    "powershell",
                    "-nop",
                    "bypass",
                    "new-object",
                    "invoke-expression",
                    "frombase64string(",
                    "| iex",
                    "|iex",
                ]
                if (
                    len([True for keyword in html_keywords if keyword in partial_str])
                    >= 2
                ):
                    sample_type.update({"kind": "html"})
                    return sample_type

                if (
                    len([True for keyword in vbs_keywords if keyword in partial_str])
                    >= 2
                ):
                    sample_type.update(
                        {"kind": "script", "platform": "win32", "extension": "vbs"}
                    )
                    return sample_type

                if (
                    len([True for keyword in js_keywords if keyword in partial_str])
                    >= 2
                ):
                    sample_type.update(
                        {"kind": "script", "platform": "win32", "extension": "js"}
                    )
                    return sample_type

                # JSE heuristics
                if re.match("#@~\\^[a-zA-Z0-9+/]{6}==", partial_str):
                    sample_type.update(
                        {
                            "kind": "script",
                            "platform": "win32",
                            "extension": "jse",  # jse is more possible than vbe
                        }
                    )
                    return sample_type
                # Powershell heuristics
                if len(
                    [True for keyword in ps_keywords if keyword.lower() in partial_str]
                ):
                    sample_type.update(
                        {"kind": "script", "platform": "win32", "extension": "ps1"}
                    )
                    return sample_type
                if magic.startswith("ASCII"):
                    sample_type.update(
                        {
                            "kind": "ascii",
                        }
                    )
                    return sample_type
                if magic.startswith("ISO-8859"):
                    sample_type.update(
                        {
                            "kind": "iso-8859-1",
                        }
                    )
                    return sample_type
                if magic.startswith("UTF-8"):
                    sample_type.update(
                        {
                            "kind": "utf-8",
                        }
                    )
                    return sample_type
                if magic.startswith("PGP"):
                    sample_type.update(
                        {
                            "kind": "pgp",
                        }
                    )
                    return sample_type
        except Exception as e:
            self.log.exception(e)

        # If not recognized then unsupported
        return None