예제 #1
0
def test_mongo_connect_store_file():
    set_cwd(tempfile.mkdtemp())
    cuckoo_create(cfg={
        "reporting": {
            "mongodb": {
                "enabled": True,
                "db": "cuckootest",
            },
        },
    })

    mongo.init()
    assert mongo.database == "cuckootest"

    fd, filepath = tempfile.mkstemp()
    os.write(fd, "hello world")
    os.close(fd)
    f = File(filepath)

    r = MongoDB()
    r.init_once()
    id1 = r.store_file(f, "foobar.txt")
    id2 = r.store_file(f, "foobar.txt")
    assert id1 == id2

    assert mongo.db.fs.files.find_one({
        "sha256": f.get_sha256(),
    })["_id"] == id1

    assert mongo.grid.get(id1).read() == "hello world"
예제 #2
0
파일: dropped.py 프로젝트: LetMeR00t/cuckoo
    def run(self):
        """Run analysis.
        @return: list of dropped files with related information.
        """
        self.key = "dropped"
        dropped_files, meta = [], {}

        if os.path.exists(self.dropped_meta_path):
            for line in open(self.dropped_meta_path, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                }

        for dir_name, dir_names, file_names in os.walk(self.dropped_path):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                file_info.update(meta.get(file_info["path"], {}))
                dropped_files.append(file_info)

        for dir_name, dir_names, file_names in os.walk(self.package_files):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                dropped_files.append(file_info)

        return dropped_files
예제 #3
0
파일: apkinfo.py 프로젝트: LetMeR00t/cuckoo
    def run(self):
        """Run androguard to extract static android information
                @return: list of static features
        """
        self.key = "apkinfo"
        apkinfo = {}

        if "file" not in self.task["category"]:
            return

        from androguard.core.bytecodes.apk import APK
        from androguard.core.bytecodes.dvm import DalvikVMFormat
        from androguard.core.analysis.analysis import uVMAnalysis
        from androguard.core.analysis import analysis

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    manifest = {}
                    apkinfo["files"] = self._apk_files(a)
                    manifest["package"] = a.get_package()
                    # manifest["permissions"]=a.get_details_permissions_new()
                    manifest["main_activity"] = a.get_main_activity()
                    manifest["activities"] = a.get_activities()
                    manifest["services"] = a.get_services()
                    manifest["receivers"] = a.get_receivers()
                    # manifest["receivers_actions"]=a.get__extended_receivers()
                    manifest["providers"] = a.get_providers()
                    manifest["libraries"] = a.get_libraries()
                    apkinfo["manifest"] = manifest
                    # apkinfo["certificate"] = a.get_certificate()
                    static_calls = {}
                    if self.check_size(apkinfo["files"]):
                        vm = DalvikVMFormat(a.get_dex())
                        vmx = uVMAnalysis(vm)

                        static_calls["all_methods"] = self.get_methods(vmx)
                        static_calls["is_native_code"] = analysis.is_native_code(vmx)
                        static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx)
                        static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx)

                        # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx)
                        # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx)
                        # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx)
                        # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx)
                        # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx)
                    else:
                        log.warning("Dex size bigger than: %s",
                                    self.options.decompilation_threshold)
                    apkinfo["static_method_calls"] = static_calls
            except (IOError, OSError, zipfile.BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return apkinfo
예제 #4
0
    def run(self):
        """Run Google play unofficial python api the get the google play information
        @return: list of google play features
        """
        self.key = "googleplay"
        googleplay = {}

        if not HAVE_GOOGLEPLAY:
            log.error("Unable to import the GooglePlay library, has it been "
                      "installed properly?")
            return

        if "file" not in self.task["category"]:
            return

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            android_id = self.options.get("android_id")
            google_login = self.options.get("google_login")
            google_password = self.options.get("google_password")
            # auth_token = self.options.get("auth_token", None)

            if not android_id and not google_login and not google_password:
                raise CuckooProcessingError("Google Play Credentials not configured, skip")

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    package = a.get_package()
                    # Connect
                    api = GooglePlayAPI(android_id)
                    api.login(google_login, google_password, None)

                    # Get the version code and the offer type from the app details
                    app_data = api.details(package)
                    app_detail = app_data.docV2.details.appDetails

                    if not app_detail.installationSize:
                        return googleplay

                    googleplay["title"] = app_detail.title
                    googleplay["app_category"] = app_detail.appCategory._values
                    googleplay["version_code"] = app_detail.versionCode
                    googleplay["app_type"] = app_detail.appType
                    googleplay["content_rating"] = app_detail.contentRating
                    googleplay["developer_email"] = app_detail.developerEmail
                    googleplay["developer_name"] = app_detail.developerName
                    googleplay["developer_website"] = app_detail.developerWebsite
                    googleplay["installation_size"] = app_detail.installationSize
                    googleplay["num_downloads"] = app_detail.numDownloads
                    googleplay["upload_date"] = app_detail.uploadDate
                    googleplay["permissions"] = app_detail.permission._values
            except (IOError, OSError, zipfile.BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return googleplay
예제 #5
0
파일: static.py 프로젝트: jgajek/cuckoo
    def run(self):
        """Run analysis.
        @return: results dict.
        """
        self.key = "static"
        static = {}

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                return

            f = File(self.file_path)
            filename = os.path.basename(self.task["target"])
        elif self.task["category"] == "archive":
            if not os.path.exists(self.file_path):
                return

            f = Archive(self.file_path).get_file(
                self.task["options"]["filename"]
            )
            filename = os.path.basename(self.task["options"]["filename"])
        else:
            return

        if filename:
            ext = filename.split(os.path.extsep)[-1].lower()
        else:
            ext = None

        package = self.task.get("package")

        if package == "exe" or ext == "exe" or "PE32" in f.get_type():
            static.update(PortableExecutable(f.file_path).run())
            static["keys"] = f.get_keys()

        if package == "wsf" or ext == "wsf":
            static["wsf"] = WindowsScriptFile(f.file_path).run()

        if package in ("doc", "ppt", "xls") or ext in self.office_ext:
            static["office"] = OfficeDocument(f.file_path).run()

        if package == "pdf" or ext == "pdf":
            static["pdf"] = dispatch(
                _pdf_worker, (f.file_path,),
                timeout=self.options.pdf_timeout
            )

        if package == "lnk" or ext == "lnk":
            static["lnk"] = LnkShortcut(f.file_path).run()

        return static
예제 #6
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                pid, num = map(int, re.findall("(\\d+)", dmp))

                regions = []
                for region in roach.procmem(dump_path).regions:
                    regions.append(region.to_json())

                proc = dict(
                    file=dump_path, pid=pid, num=num,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=regions,
                )

                ExtractManager.for_task(self.task["id"]).peek_procmem(proc)

                if self.options.get("idapro"):
                    self.create_idapy(proc)

                if self.options.get("extract_img"):
                    proc["extracted"] = list(self.dump_images(
                        proc, self.options.get("extract_dll")
                    ))

                if self.options.get("dump_delete"):
                    try:
                        os.remove(dump_path)
                    except OSError:
                        log.error(
                            "Unable to delete memory dump file at path \"%s\"",
                            dump_path
                        )

                results.append(proc)

        results.sort(key=lambda x: (x["pid"], x["num"]))
        return results
예제 #7
0
파일: api.py 프로젝트: leixyou/cuckoo
    def store_screenshots(request, task_id, body):
        if not body or not isinstance(body, list):
            return json_error_response("screenshots missing")

        report = ControlApi.get_report(int(task_id))

        if not report:
            return json_error_response("report missing")

        for scr in body:
            sid = scr.get("id", None)
            data = scr.get("data", None)

            try:
                if sid is None or not data:
                    raise ValueError

                ftype, b64 = data.split(",")
                if ftype != "data:image/png;base64":
                    raise ValueError

                f = base64.b64decode(b64)
                if f[:4] != "\x89PNG":
                    raise ValueError
            except ValueError:
                return json_error_response("invalid format")

            shot_path = cwd(
                "shots", "remotecontrol_%d.png" % int(sid),
                analysis=int(task_id)
            )
            open(shot_path, "wb").write(f)

            shot_blob = {}
            shot = File(shot_path)
            if shot.valid():
                shot_id = mdb.store_file(shot)
                shot_blob["original"] = shot_id

            if shot_blob:
                report["shots"].append(shot_blob)

        mdb.db.analysis.save(report)
        return JsonResponse({
            "status": "success",
        })
예제 #8
0
    def push_script(self, process, command):
        filepath = self.write_extracted(command.ext,
                                        command.get_script().encode("utf8"))
        if not filepath:
            return

        process = process or {}

        yara_matches = File(filepath).get_yara("scripts")
        self.items.append({
            "category": "script",
            "program": command.program,
            "pid": process.get("pid"),
            "first_seen": process.get("first_seen"),
            "raw": filepath,
            "yara": yara_matches,
            "info": {},
        })
        for match in yara_matches:
            match = YaraMatch(match, "script")
            self.handle_yara(filepath, match)
예제 #9
0
 def test_magic1(self):
     f = File("tests/files/foo.txt")
     assert "ASCII text" in f.get_type()
     assert f.get_content_type() == "text/plain"
예제 #10
0
    def setup(self):
        # File() will invoke cwd(), so any CWD is required.
        set_cwd(tempfile.mkdtemp())

        self.path = tempfile.mkstemp()[1]
        self.file = File(self.path)
예제 #11
0
class TestFile(object):
    def setup(self):
        # File() will invoke cwd(), so any CWD is required.
        set_cwd(tempfile.mkdtemp())

        self.path = tempfile.mkstemp()[1]
        self.file = File(self.path)

    def test_get_name(self):
        assert self.path.split(os.sep)[-1] == self.file.get_name()

    def test_get_data(self):
        assert "" == self.file.get_data()

    def test_get_size(self):
        assert 0 == self.file.get_size()

    def test_get_crc32(self):
        assert "00000000" == self.file.get_crc32()

    def test_get_md5(self):
        assert "d41d8cd98f00b204e9800998ecf8427e" == self.file.get_md5()

    def test_get_sha1(self):
        assert "da39a3ee5e6b4b0d3255bfef95601890afd80709" == self.file.get_sha1()

    def test_get_sha256(self):
        assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" == self.file.get_sha256()

    def test_get_sha512(self):
        assert "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" == self.file.get_sha512()

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert self.file.get_ssdeep() is not None
            pydeep  # Fake usage.
        except ImportError:
            assert self.file.get_ssdeep() is None

    def test_get_type(self):
        assert "empty" in self.file.get_type()

    def test_get_content_type(self):
        assert self.file.get_content_type() in ["inode/x-empty", "application/x-empty"]

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in ["name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type"]:
            assert key in self.file.get_all()
예제 #12
0
파일: static.py 프로젝트: muhzii/cuckoo
    def run(self):
        """Run analysis.
        @return: results dict.
        """
        self.key = "static"
        static = {}

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                return

            f = File(self.file_path)
            filename = os.path.basename(self.task["target"])
        elif self.task["category"] == "archive":
            if not os.path.exists(self.file_path):
                return

            f = Archive(self.file_path).get_file(
                self.task["options"]["filename"])
            filename = os.path.basename(self.task["options"]["filename"])
        else:
            return

        if filename:
            ext = filename.split(os.path.extsep)[-1].lower()
        else:
            ext = None

        package = self.task.get("package")

        if package == "generic" and (ext == "elf" or "ELF" in f.get_type()):
            static["elf"] = ELF(f.file_path).run()
            static["keys"] = f.get_keys()

        if package == "exe" or ext == "exe" or "PE32" in f.get_type():
            static.update(PortableExecutable(f.file_path).run())
            static["keys"] = f.get_keys()

        if package == "wsf" or ext == "wsf":
            static["wsf"] = WindowsScriptFile(f.file_path).run()

        if package in ("doc", "ppt", "xls") or ext in self.office_ext:
            static["office"] = OfficeDocument(f.file_path,
                                              self.task["id"]).run()

        if package == "pdf" or ext == "pdf":
            if f.get_content_type() == "application/pdf":
                static["pdf"] = dispatch(
                    _pdf_worker, (f.file_path, ),
                    timeout=self.options.pdf_timeout) or []
            else:
                static["pdf"] = []

        if package == "generic" or ext == "lnk":
            static["lnk"] = LnkShortcut(f.file_path).run()

        if package == "apk" or ext == "apk" or any(t in f.get_type()
                                                   for t in ("JAR", "Zip")):
            static["apkinfo"] = AndroidPackage(f.file_path).run()

        return static
예제 #13
0
파일: scheduler.py 프로젝트: cccs-jp/cuckoo
    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error(
                "Analysis results folder already exists at path \"%s\", "
                "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        # Also create all directories that the ResultServer can use for file
        # uploads.
        try:
            Folders.create(self.storage, RESULT_DIRECTORIES)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"", self.task.target)
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target)
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary)
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error(
                    "Unable to create symlink/copy from \"%s\" to "
                    "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True
예제 #14
0
    def setup(self):
        # File() will invoke cwd(), so any CWD is required.
        set_cwd(tempfile.mkdtemp())

        self.path = tempfile.mkstemp()[1]
        self.file = File(self.path)
예제 #15
0
 def test_no_keys(self):
     assert File("tests/files/pdf0.pdf").get_keys() == []
예제 #16
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if "network" not in report:
            report["network"] = {}

        # This will likely hardcode the cuckoo.log to this point, but that
        # should be fine.
        if report.get("debug"):
            report["debug"]["cuckoo"] = list(report["debug"]["cuckoo"])

        # Store path of the analysis path.
        report["info"]["analysis_path"] = self.analysis_path

        # Store the sample in GridFS.
        if results.get("info", {}).get("category") == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm")
        mitmpr = File(mitmproxy_path)
        if mitmpr.valid():
            mitmpr_id = self.store_file(mitmpr)
            report["network"]["mitmproxy_id"] = mitmpr_id

        # Store the process memory dump file and extracted files in GridFS and
        # reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(
                    self.analysis_path, "memory", "%s.dmp" % procmem["pid"]
                )
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    procmem["procmem_id"] = procmem_id

                for extracted in procmem.get("extracted", []):
                    f = File(extracted["path"])
                    if f.valid():
                        extracted["extracted_id"] = self.store_file(f)

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        new_extracted = []
        if "extracted" in report:
            for extracted in report["extracted"]:
                new_extr = dict(extracted)
                extr = File(extracted[extracted["category"]])
                if extr.valid():
                    extr_id = self.store_file(extr)
                    new_extr["object_id"] = extr_id

                new_extracted.append(new_extr)

        report["extracted"] = new_extracted

        # Add screenshots.
        report["shots"] = []
        if os.path.exists(self.shots_path):
            # Walk through the files and select the JPGs.
            for shot_file in sorted(os.listdir(self.shots_path)):
                if not shot_file.endswith(".jpg") or "_" in shot_file:
                    continue

                shot_path = os.path.join(self.shots_path, shot_file)
                shot_path_dir = os.path.dirname(shot_path)
                shot_file_name, shot_file_ext = os.path.splitext(shot_file)
                shot_path_resized = os.path.join(shot_path_dir, "%s_small%s" % (shot_file_name, shot_file_ext))

                shot_blob = {}

                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if os.path.isfile(shot_path):
                    shot = File(shot_path)
                    if shot.valid():
                        shot_id = self.store_file(shot)
                        shot_blob["original"] = shot_id

                # Try to get the alternative (small) size for this image,
                # store it and reference it back in the report.
                if os.path.isfile(shot_path_resized):
                    shot_small = File(shot_path_resized)
                    if shot_small.valid():
                        shot_id = self.store_file(shot_small)
                        shot_blob["small"] = shot_id

                if shot_blob:
                    report["shots"].append(shot_blob)

        paginate = self.options.get("paginate", 100)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for call in process["calls"]:
                    # If the chunk size is paginate or if the loop is
                    # completed then store the chunk in MongoDB.
                    if len(chunk) == paginate:
                        to_insert = {"pid": process["pid"], "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["pid"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        if report.get("procmon"):
            procmon, chunk = [], []

            for entry in report["procmon"]:
                if len(chunk) == paginate:
                    procmon.append(self.db.procmon.insert(chunk))
                    chunk = []

                chunk.append(entry)

            if chunk:
                procmon.append(self.db.procmon.insert(chunk))

            report["procmon"] = procmon

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
예제 #17
0
파일: procmemory.py 프로젝트: muhzii/cuckoo
    def dump_images(self, process, drop_dlls=False):
        """Dump executable images from this process memory dump."""
        buf = open(process["file"], "rb").read()

        images, capture, regions, end, pe = [], False, [], None, None
        for r in process["regions"]:
            off, size = r["offset"], r["size"]

            if capture:
                if int(r["end"], 16) > end:
                    images.append((pe, regions))
                    capture = False
                else:
                    regions.append(r)
                continue

            # We're going to take a couple of assumptions for granted here.
            # Namely, the PE header is fully intact, has not been tampered
            # with, and the DOS header, the NT header, and the Optional header
            # all remain in the first page/chunk of this PE file.
            if buf[off:off + 2] != "MZ":
                continue

            try:
                pe = pefile.PE(data=buf[off:off + size], fast_load=True)
            except pefile.PEFormatError:
                continue

            # Enable the capture of memory regions.
            capture, regions = True, [r]
            end = int(r["addr"], 16) + pe.OPTIONAL_HEADER.SizeOfImage

        # If present, also process the last loaded executable.
        if capture and regions:
            images.append((pe, regions))

        for pe, regions in images:
            img = []

            # Skip DLLs if requested to do so (the default).
            if pe.is_dll() and not drop_dlls:
                continue

            hdrsz = self._fixup_pe_header(pe)
            if not hdrsz:
                continue

            img.append(str(pe.write())[:hdrsz])
            for idx, r in enumerate(regions):
                offset = r["offset"]
                if not idx:
                    offset += hdrsz
                img.append(buf[offset:r["offset"] + r["size"]])

            sha1 = hashlib.sha1("".join(img)).hexdigest()

            if pe.is_dll():
                filename = "%s-%s.dll_" % (process["pid"], sha1[:16])
            elif pe.is_exe():
                filename = "%s-%s.exe_" % (process["pid"], sha1[:16])
            else:
                log.warning("Unknown injected executable for pid=%s",
                            process["pid"])
                continue

            filepath = os.path.join(self.pmemory_path, filename)
            open(filepath, "wb").write("".join(img))

            yield File(filepath).get_all()
예제 #18
0
    def run(self):
        """Runs TIE processing
        @return: TIE results
        """
        log.info("Processing TIE reputation analysis.")

        self.key = "tie"
        timeout = int(self.options.get("timeout", 60))
        scan = int(self.options.get("scan", 0))

        # Evaluate the original sample against TIE reputation
        if self.task["category"] == "file":
            # Create the client
            with DxlClient(config) as client:
                # Connect to the fabric
                client.connect()

                tie_client = TieClient(client)

                #Generate relevant hash information
                md5_hex = File(self.file_path).get_md5()
                sha1_hex = File(self.file_path).get_sha1()
                sha256_hex = File(self.file_path).get_sha256()

                #Request raw json reputation results
                reputations_dict = \
                        tie_client.get_file_reputation({
                        HashType.MD5: md5_hex,
                        HashType.SHA1: sha1_hex,
                        HashType.SHA256: sha256_hex
                        })

                #debug
                log.info("Raw TIE results: " +
                         json.dumps(reputations_dict,
                                    sort_keys=True,
                                    indent=4,
                                    separators=(',', ': ')))

                #initialize result array and tiekey counter for each result
                proc_result = {}
                tiekey = 0
                strtiekey = str(tiekey)
                # Display the Global Threat Intelligence
                if FileProvider.GTI in reputations_dict:
                    gti_rep = reputations_dict[FileProvider.GTI]
                    proc_result[strtiekey] = {}
                    proc_result[strtiekey][
                        'title'] = "Global Threat Intelligence (GTI) Test Date:"
                    proc_result[strtiekey][
                        'value'] = EpochMixin.to_localtime_string(
                            gti_rep[ReputationProp.CREATE_DATE])
                    tiekey += 1
                    strtiekey = str(tiekey)

                    #Set GTI Trust Level
                    proc_result[strtiekey] = {}
                    proc_result[strtiekey][
                        'title'] = "Global Threat Intelligence (GTI) trust level:"
                    trustValue = gti_rep[ReputationProp.TRUST_LEVEL]
                    proc_result[strtiekey]['value'] = self.trustLevel(
                        trustValue)
                    tiekey += 1
                    strtiekey = str(tiekey)

                # Display the Enterprise reputation information
                if FileProvider.ENTERPRISE in reputations_dict:
                    ent_rep = reputations_dict[FileProvider.ENTERPRISE]

                    # Retrieve the enterprise reputation attributes
                    ent_rep_attribs = ent_rep[ReputationProp.ATTRIBUTES]

                    # Display prevalence (if it exists)
                    if FileEnterpriseAttrib.PREVALENCE in ent_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey][
                            'title'] = "Enterprise prevalence:"
                        proc_result[strtiekey]['value'] = ent_rep_attribs[
                            FileEnterpriseAttrib.PREVALENCE]
                        tiekey += 1
                        strtiekey = str(tiekey)

                    # Display first contact date (if it exists)
                    if FileEnterpriseAttrib.FIRST_CONTACT in ent_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey]['title'] = "First contact: "
                        proc_result[strtiekey][
                            'value'] = FileEnterpriseAttrib.to_localtime_string(
                                ent_rep_attribs[
                                    FileEnterpriseAttrib.FIRST_CONTACT])
                        tiekey += 1
                        strtiekey = str(tiekey)

                #These are lookup conversions for the ATD trust_score
                valueDict = {}
                valueDict['-1'] = "Known Trusted"
                valueDict['0'] = "Most Likely Trusted"
                valueDict['1'] = "Might Be Trusted"
                valueDict['2'] = "Unknown"
                valueDict['3'] = "Might Be Malicious"
                valueDict['4'] = "Most Likely Malicious"
                valueDict['5'] = "Known Malicious"
                valueDict['-2'] = "Not Set"

                # Display the ATD reputation information
                if FileProvider.ATD in reputations_dict:
                    atd_rep = reputations_dict[FileProvider.ATD]

                    # Retrieve the ATD reputation attributes
                    atd_rep_attribs = atd_rep[ReputationProp.ATTRIBUTES]

                    proc_result[strtiekey] = {}
                    proc_result[strtiekey]['title'] = "ATD Test Date: "
                    proc_result[strtiekey][
                        'value'] = EpochMixin.to_localtime_string(
                            atd_rep[ReputationProp.CREATE_DATE])
                    tiekey += 1
                    strtiekey = str(tiekey)

                    # Display GAM Score (if it exists)
                    if AtdAttrib.GAM_SCORE in atd_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey][
                            'title'] = "ATD Gateway AntiMalware Score: "
                        proc_result[strtiekey]['value'] = valueDict[
                            atd_rep_attribs[AtdAttrib.GAM_SCORE]]
                        tiekey += 1
                        strtiekey = str(tiekey)

                    # Display AV Engine Score (if it exists)
                    if AtdAttrib.AV_ENGINE_SCORE in atd_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey][
                            'title'] = "ATD AV Engine Score: "
                        proc_result[strtiekey]['value'] = valueDict[
                            atd_rep_attribs[AtdAttrib.AV_ENGINE_SCORE]]
                        tiekey += 1
                        strtiekey = str(tiekey)

                    # Display Sandbox Score (if it exists)
                    if AtdAttrib.SANDBOX_SCORE in atd_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey]['title'] = "ATD Sandbox Score: "
                        proc_result[strtiekey]['value'] = valueDict[
                            atd_rep_attribs[AtdAttrib.SANDBOX_SCORE]]
                        tiekey += 1
                        strtiekey = str(tiekey)

                    # Display Verdict (if it exists)
                    if AtdAttrib.VERDICT in atd_rep_attribs:
                        proc_result[strtiekey] = {}
                        proc_result[strtiekey]['title'] = "ATD Verdict: "
                        proc_result[strtiekey]['value'] = valueDict[
                            atd_rep_attribs[AtdAttrib.VERDICT]]
                        tiekey += 1
                        strtiekey = str(tiekey)

                results = proc_result

        elif self.task["category"] == "url":
            return
        elif self.task["category"] == "baseline":
            return
        elif self.task["category"] == "service":
            return
        else:
            raise CuckooProcessingError("Unsupported task category: %s" %
                                        self.task["category"])

        log.info("Finished processing TIE reputation analysis.")
        return results
예제 #19
0
    def run(self):
        """Run Google play unofficial python api the get the google play information
        @return: list of google play features
        """
        self.key = "googleplay"
        googleplay = {}

        if not HAVE_GOOGLEPLAY:
            log.error("Unable to import the GooglePlay library, has it been "
                      "installed properly?")
            return

        if "file" not in self.task["category"]:
            return

        from androguard.core.bytecodes.apk import APK

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "Sample file doesn't exist: \"%s\"" % self.file_path)

            android_id = self.options.get("android_id")
            google_login = self.options.get("google_login")
            google_password = self.options.get("google_password")
            # auth_token = self.options.get("auth_token", None)

            if not android_id and not google_login and not google_password:
                raise CuckooProcessingError(
                    "Google Play Credentials not configured, skip")

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    package = a.get_package()
                    # Connect
                    api = GooglePlayAPI(android_id)
                    api.login(google_login, google_password, None)

                    # Get the version code and the offer type from the app details
                    app_data = api.details(package)
                    app_detail = app_data.docV2.details.appDetails

                    if not app_detail.installationSize:
                        return googleplay

                    googleplay["title"] = app_detail.title
                    googleplay["app_category"] = app_detail.appCategory._values
                    googleplay["version_code"] = app_detail.versionCode
                    googleplay["app_type"] = app_detail.appType
                    googleplay["content_rating"] = app_detail.contentRating
                    googleplay["developer_email"] = app_detail.developerEmail
                    googleplay["developer_name"] = app_detail.developerName
                    googleplay[
                        "developer_website"] = app_detail.developerWebsite
                    googleplay[
                        "installation_size"] = app_detail.installationSize
                    googleplay["num_downloads"] = app_detail.numDownloads
                    googleplay["upload_date"] = app_detail.uploadDate
                    googleplay["permissions"] = app_detail.permission._values
            except (IOError, OSError, zipfile.BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return googleplay
예제 #20
0
    def run(self):
        """Run androguard to extract static android information
                @return: list of static features
        """
        self.key = "apkinfo"
        apkinfo = {}

        if "file" not in self.task["category"]:
            return

        from androguard.core.bytecodes.apk import APK
        from androguard.core.bytecodes.dvm import DalvikVMFormat
        from androguard.core.analysis.analysis import uVMAnalysis
        from androguard.core.analysis import analysis

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    manifest = {}
                    apkinfo["files"] = self._apk_files(a)
                    manifest["package"] = a.get_package()
                    # manifest["permissions"]=a.get_details_permissions_new()
                    manifest["main_activity"] = a.get_main_activity()
                    manifest["activities"] = a.get_activities()
                    manifest["services"] = a.get_services()
                    manifest["receivers"] = a.get_receivers()
                    # manifest["receivers_actions"]=a.get__extended_receivers()
                    manifest["providers"] = a.get_providers()
                    manifest["libraries"] = a.get_libraries()
                    apkinfo["manifest"] = manifest
                    # apkinfo["certificate"] = a.get_certificate()
                    static_calls = {}
                    if self.check_size(apkinfo["files"]):
                        vm = DalvikVMFormat(a.get_dex())
                        vmx = uVMAnalysis(vm)

                        static_calls["all_methods"] = self.get_methods(vmx)
                        static_calls[
                            "is_native_code"] = analysis.is_native_code(vmx)
                        static_calls["is_dynamic_code"] = analysis.is_dyn_code(
                            vmx)
                        static_calls[
                            "is_reflection_code"] = analysis.is_reflection_code(
                                vmx)

                        # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx)
                        # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx)
                        # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx)
                        # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx)
                        # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx)
                    else:
                        log.warning("Dex size bigger than: %s",
                                    self.options.decompilation_threshold)
                    apkinfo["static_method_calls"] = static_calls
            except (IOError, OSError, zipfile.BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return apkinfo
예제 #21
0
    def run(self):
        results = {}

        # Include any results provided by the mitm script.
        results["mitm"] = []
        if os.path.exists(self.mitmout_path):
            for line in open(self.mitmout_path, "rb"):
                try:
                    results["mitm"].append(json.loads(line))
                except:
                    results["mitm"].append(line)

        if not os.path.exists(self.pcap_path):
            log.warning("The PCAP file does not exist at path \"%s\".",
                        self.pcap_path)
            return results

        if not os.path.getsize(self.pcap_path):
            log.error("The PCAP file at path \"%s\" is empty." %
                      self.pcap_path)
            return results

        log.debug("keys: {0} , values : {1}".format(self.options.keys(),
                                                    self.options.values()))

        filter_shark = self.options.get("filter_tshark")

        if filter_shark:
            log.debug("The filter of tshark is {0}.".format(filter_shark))
            if not self.is_tool("tshark"):
                log.error(
                    "The tshark is not present on your system. Please install it !"
                )
            else:
                log.debug("Run tshark to filter pcap")
                filtered_path = self.pcap_path.replace("dump.",
                                                       "dump_filtered.")
                filtered_1_path = self.pcap_path.replace(
                    "dump.", "dump_filtered_1.")

                cmd = "tshark -F pcap -r " + self.pcap_path + " -Y \"" + filter_shark + "\" -w " + filtered_path
                log.debug("{0}".format(cmd))
                p = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
                output, err = p.communicate()

                if err:
                    log.error(
                        "There is an error when filtering the pcap : %s from %s"
                        % (err, self.pcap_path))
                    return results

                os.renames(self.pcap_path, filtered_1_path)
                os.renames(filtered_path, self.pcap_path)
        else:
            log.debug("The Tshark filter is not present.")

        # PCAP file hash.
        results["pcap_sha256"] = File(self.pcap_path).get_sha256()

        sorted_path = self.pcap_path.replace("dump.", "dump_sorted.")
        if config("cuckoo:processing:sort_pcap"):
            sort_pcap(self.pcap_path, sorted_path)

            # Sorted PCAP file hash.
            if os.path.exists(sorted_path):
                results["sorted_pcap_sha256"] = File(sorted_path).get_sha256()
                pcap_path = sorted_path
            else:
                pcap_path = self.pcap_path
        else:
            pcap_path = self.pcap_path

        results.update(Pcap(pcap_path, self.options).run())

        if os.path.exists(pcap_path):
            try:
                p2 = Pcap2(pcap_path, self.get_tlsmaster(), self.network_path)
                results.update(p2.run())
            except:
                log.exception("Error running httpreplay-based PCAP analysis")

        return results
예제 #22
0
 def test_not_temporary_file(self):
     f = File("tests/files/pdf0.pdf")
     assert os.path.exists("tests/files/pdf0.pdf")
     del f
     assert os.path.exists("tests/files/pdf0.pdf")
예제 #23
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if "network" not in report:
            report["network"] = {}

        # This will likely hardcode the cuckoo.log to this point, but that
        # should be fine.
        if report.get("debug"):
            report["debug"]["cuckoo"] = list(report["debug"]["cuckoo"])

        # Store path of the analysis path.
        report["info"]["analysis_path"] = self.analysis_path

        # Store the sample in GridFS.
        if results.get("info", {}).get("category") == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm")
        mitmpr = File(mitmproxy_path)
        if mitmpr.valid():
            mitmpr_id = self.store_file(mitmpr)
            report["network"]["mitmproxy_id"] = mitmpr_id

        # Store the process memory dump file and extracted files in GridFS and
        # reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(
                    self.analysis_path, "memory", "%s.dmp" % procmem["pid"]
                )
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    procmem["procmem_id"] = procmem_id

                for extracted in procmem.get("extracted", []):
                    f = File(extracted["path"])
                    if f.valid():
                        extracted["extracted_id"] = self.store_file(f)

        # Store the scripts that Floss generated in GredFS and reference
        # them back in the report.
        if "strings" in report:
            if "idapro_sct_name" in report["strings"]:
                idapro_sct_path = os.path.join(
                    self.analysis_path, "str_script", report["strings"]["idapro_sct_name"]
                )
                idapro_sct_file = File(idapro_sct_path)
                if idapro_sct_file.valid():
                    report["strings"]["idapro_sct_id"] = self.store_file(idapro_sct_file)

            if "radare_sct_name" in report["strings"]:
                radare_sct_path = os.path.join(
                    self.analysis_path, "str_script", report["strings"]["radare_sct_name"]
                )
                radare_sct_file = File(radare_sct_path)
                if radare_sct_file.valid():
                    report["strings"]["radare_sct_id"] = self.store_file(radare_sct_file)

            if "x64dbg_sct_name" in report["strings"]:
                x64dbg_sct_path = os.path.join(
                    self.analysis_path, "str_script", report["strings"]["x64dbg_sct_name"]
                )
                x64dbg_sct_file = File(x64dbg_sct_path)
                if x64dbg_sct_file.valid():
                    report["strings"]["x64dbg_sct_id"] = self.store_file(x64dbg_sct_file)

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        new_extracted = []
        if "extracted" in report:
            for extracted in report["extracted"]:
                new_extr = dict(extracted)
                extr = File(extracted["raw"])
                if extr.valid():
                    extr_id = self.store_file(extr)
                    new_extr["object_id"] = extr_id

                new_extracted.append(new_extr)

        report["extracted"] = new_extracted

        # Add screenshots.
        report["shots"] = []
        if os.path.exists(self.shots_path):
            # Walk through the files and select the JPGs.
            for shot_file in sorted(os.listdir(self.shots_path)):
                if not shot_file.endswith(".jpg") or "_" in shot_file:
                    continue

                shot_path = os.path.join(self.shots_path, shot_file)
                shot_path_dir = os.path.dirname(shot_path)
                shot_file_name, shot_file_ext = os.path.splitext(shot_file)
                shot_path_resized = os.path.join(shot_path_dir, "%s_small%s" % (shot_file_name, shot_file_ext))

                shot_blob = {}

                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if os.path.isfile(shot_path):
                    shot = File(shot_path)
                    if shot.valid():
                        shot_id = self.store_file(shot)
                        shot_blob["original"] = shot_id

                # Try to get the alternative (small) size for this image,
                # store it and reference it back in the report.
                if os.path.isfile(shot_path_resized):
                    shot_small = File(shot_path_resized)
                    if shot_small.valid():
                        shot_id = self.store_file(shot_small)
                        shot_blob["small"] = shot_id

                if shot_blob:
                    report["shots"].append(shot_blob)

        paginate = self.options.get("paginate", 100)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for call in process["calls"]:
                    # If the chunk size is paginate or if the loop is
                    # completed then store the chunk in MongoDB.
                    if len(chunk) == paginate:
                        to_insert = {"pid": process["pid"], "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["pid"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        if report.get("procmon"):
            procmon, chunk = [], []

            for entry in report["procmon"]:
                if len(chunk) == paginate:
                    procmon.append(self.db.procmon.insert(chunk))
                    chunk = []

                chunk.append(entry)

            if chunk:
                procmon.append(self.db.procmon.insert(chunk))

            report["procmon"] = procmon

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
예제 #24
0
class TestFile(object):
    def setup(self):
        # File() will invoke cwd(), so any CWD is required.
        set_cwd(tempfile.mkdtemp())

        self.path = tempfile.mkstemp()[1]
        self.file = File(self.path)

    def test_get_name(self):
        assert self.path.split(os.sep)[-1] == self.file.get_name()

    def test_get_data(self):
        assert "" == self.file.get_data()

    def test_get_size(self):
        assert 0 == self.file.get_size()

    def test_get_crc32(self):
        assert "00000000" == self.file.get_crc32()

    def test_get_md5(self):
        assert "d41d8cd98f00b204e9800998ecf8427e" == self.file.get_md5()

    def test_get_sha1(self):
        assert "da39a3ee5e6b4b0d3255bfef95601890afd80709" == self.file.get_sha1(
        )

    def test_get_sha256(self):
        assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" == self.file.get_sha256(
        )

    def test_get_sha512(self):
        assert "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" == self.file.get_sha512(
        )

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert self.file.get_ssdeep() is not None
            pydeep  # Fake usage.
        except ImportError:
            assert self.file.get_ssdeep() is None

    def test_get_type(self):
        assert "empty" in self.file.get_type()

    def test_get_content_type(self):
        assert self.file.get_content_type() in [
            "inode/x-empty", "application/x-empty"
        ]

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in [
                "name", "size", "crc32", "md5", "sha1", "sha256", "sha512",
                "ssdeep", "type"
        ]:
            assert key in self.file.get_all()
예제 #25
0
 def file_report(self, filepath, summary=False):
     """Get the report of an existing file scan.
     @param filepath: file path
     @param summary: if you want a summary report"""
     resource = File(filepath).get_md5()
     return self._get_report(self.FILE_REPORT, resource, summary)
예제 #26
0
 def test_magic1(self):
     f = File("tests/files/foo.txt")
     assert "ASCII text" in f.get_type()
     assert f.get_content_type() == "text/plain"
예제 #27
0
 def test_symlink_magic(self):
     filepath = tempfile.mktemp()
     os.symlink(__file__, filepath)
     assert File(filepath).get_type().startswith("Python script")
     assert File(filepath).get_content_type() == "text/x-python"
예제 #28
0
def submit_tasks(target, options, package, custom, owner, timeout, priority,
                 machine, platform, memory, enforce_timeout, clock, tags,
                 remote, pattern, maxcount, is_unique, is_url, is_baseline,
                 is_shuffle):
    db = Database()

    data = dict(
        package=package or "",
        timeout=timeout,
        options=options,
        priority=priority,
        machine=machine,
        platform=platform,
        custom=custom,
        owner=owner,
        tags=tags,
        memory="1" if memory else "0",
        enforce_timeout="1" if enforce_timeout else "0",
        clock=clock,
        unique="1" if is_unique else "0",
    )

    if is_baseline:
        if remote:
            print "Remote baseline support has not yet been implemented."
            return

        task_id = db.add_baseline(timeout, owner, machine, memory)
        yield "Baseline", machine, task_id
        return

    if is_url and is_unique:
        print "URL doesn't have --unique support yet."
        return

    if is_url:
        for url in target:
            if not remote:
                data.pop("unique", None)
                task_id = db.add_url(to_unicode(url), **data)
                yield "URL", url, task_id
                continue

            data["url"] = to_unicode(url)
            try:
                r = requests.post("http://%s/tasks/create/url" % remote,
                                  data=data)
                yield "URL", url, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit URL: %s" % (bold(red("Error")), e)
    else:
        files = []
        for path in target:
            files.extend(enumerate_files(os.path.abspath(path), pattern))

        if is_shuffle:
            random.shuffle(files)

        for filepath in files:
            if not os.path.getsize(filepath):
                print "%s: sample %s (skipping file)" % (bold(
                    yellow("Empty")), filepath)
                continue

            if maxcount is not None:
                if not maxcount:
                    break
                maxcount -= 1

            if not remote:
                if is_unique:
                    sha256 = File(filepath).get_sha256()
                    if db.find_sample(sha256=sha256):
                        yield "File", filepath, None
                        continue

                data.pop("unique", None)
                task_id = db.add_path(file_path=filepath, **data)
                yield "File", filepath, task_id
                continue

            files = {
                "file": (os.path.basename(filepath), open(filepath, "rb")),
            }

            try:
                r = requests.post("http://%s/tasks/create/file" % remote,
                                  data=data,
                                  files=files)
                yield "File", filepath, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit file: %s" % (bold(red("Error")), e)
                continue
예제 #29
0
    def run(self):
        """Run Floss on analyzed file.
        @return: Floss results dict.
        """
        self.key = "strings"
        self.floss = self.options.get("floss")
        self.MIN_STRINGLEN = int(self.options.get("min_str_len"))
        self.MAX_STRINGLEN = self.options.get("max_str_len")
        self.MAX_STRINGCNT = self.options.get("max_str_cnt")
        self.MAX_FILESIZE = 16*1024*1024
        
        STRING_TYPES = [
            "decoded",
            "stack",
            "static"
        ]
        
        strings = {}

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "Sample file doesn't exist: \"%s\"" % self.file_path
                )

            try:
                f = File(self.file_path)
                filename = os.path.basename(self.task["target"])
                base_name = os.path.splitext(filename)[0]
                ext = filename.split(os.path.extsep)[-1].lower()
                data = open(self.file_path, "r").read(self.MAX_FILESIZE)
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)
            
            # Extract static strings
            static_strings = re.findall("[\x1f-\x7e]{" + str(self.MIN_STRINGLEN) + ",}", data)
            for s in re.findall("(?:[\x1f-\x7e][\x00]){" + str(self.MIN_STRINGLEN) + ",}", data):
                static_strings.append(s.decode("utf-16le"))

            if self.MAX_STRINGLEN != 0:
                for i, s in enumerate(static_strings):
                    static_strings[i] = s[:self.MAX_STRINGLEN]

            if self.MAX_STRINGCNT != 0 and len(static_strings) > self.MAX_STRINGCNT:
                static_strings = static_strings[:self.MAX_STRINGCNT]
                static_strings.append("[snip]")

            package = self.task.get("package")

            if self.floss and (package == "exe" or ext == "exe" or "PE32" in f.get_type()):
                # Disable floss verbose logging
                main.set_logging_levels()
                
                try:
                    # Prepare Floss for extracting hidden & encoded strings
                    vw = vivisect.VivWorkspace()
                    vw.loadFromFile(self.file_path)
                    vw.analyze()

                    selected_functions = main.select_functions(vw, None)
                    decoding_functions_candidates = id_man.identify_decoding_functions(
                        vw, main.get_all_plugins(), selected_functions
                    )
                except Exception as e:
                    raise CuckooProcessingError("Error analyzing file with vivisect: %s" % e)

                try:
                    # Decode & extract hidden & encoded strings
                    decoded_strings = main.decode_strings(
                        vw, decoding_functions_candidates, self.MIN_STRINGLEN
                    )
                    decoded_strs = main.filter_unique_decoded(decoded_strings)

                    stack_strings = stackstrings.extract_stackstrings(
                        vw, selected_functions, self.MIN_STRINGLEN
                    )
                    stack_strings = list(stack_strings)

                    decoded_strings = [x for x in decoded_strs if not x in static_strings]
                except Exception as e:
                    raise CuckooProcessingError("Error extracting strings with floss: %s" % e)

                if len(decoded_strings) or len(stack_strings):
                    # Create annotated scripts
                    if self.options.get("idapro_str_sct"):
                        idapro_sct_name = base_name + ".idb"
                        strings["idapro_sct_name"] = idapro_sct_name

                        main.create_ida_script(
                            self.file_path, os.path.join(self.str_script_path, idapro_sct_name), 
                            decoded_strings, stack_strings
                        )

                    if self.options.get("radare_str_sct"):
                        radare_sct_name = base_name + ".r2"
                        strings["radare_sct_name"] = radare_sct_name

                        main.create_r2_script(
                            self.file_path, os.path.join(self.str_script_path, radare_sct_name), 
                            decoded_strings, stack_strings
                        )

                    if self.options.get("x64dbg_str_sct"):
                        x64dbg_sct_name = base_name + ".json"
                        strings["x64dbg_sct_name"] = x64dbg_sct_name

                        imagebase = vw.filemeta.values()[0]['imagebase']
                        main.create_x64dbg_database(
                            self.file_path, os.path.join(self.str_script_path, base_name + ".json"), 
                            imagebase, decoded_strings
                        )

                # convert Floss strings into regular, readable strings
                for idx, s in enumerate(decoded_strings):
                    decoded_strings[idx] = main.sanitize_string_for_printing(s.s)

                for idx, s in enumerate(stack_strings):
                    stack_strings[idx] = s.s

                results = [decoded_strings, stack_strings, static_strings]

                for idx, str_type in enumerate(STRING_TYPES):
                    strings[str_type] = results[idx]

            else:
                strings["static"] = static_strings

        return strings