Python File Examples

Programming Language: Python

Namespace/Package Name: lib.cuckoo.common.objects

Class/Type: File

Examples at hotexamples.com: 53

Python File - 53 examples found. These are the top rated real world Python examples of lib.cuckoo.common.objects.File extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

File(30)

get_yara(11)

valid(10)

get(6)

get_name(5)

update(5)

get_type(5)

get_cape_name_from_yara_hit(3)

get_urls(3)

get_sha256(2)

yara_hit_provides_detection(2)

get_md5(1)

encode(1)

get_dll_exports(1)

get_sha512(1)

get_size(1)

get_ssdeep(1)

get_data(1)

get_crc32(1)

get_content_type(1)

lower(1)

split(1)

get_cape_name_from_cape_type(1)

get_all(1)

get_sha1(1)

Example #1

Show file

File: procmemory.py Project: evandowning/cuckoo

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                if "-" in os.path.basename(dump_path):
                    pid = int(os.path.basename(dump_path).split("-")[0])
                else:
                    pid = int(os.path.basename(dump_path).split(".")[0])

                proc = dict(
                    file=dump_path, pid=pid,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)

                results.append(proc)

        return results

Example #2

Show file

File: dropped.py Project: 0day29/cuckoo

    def run(self):
        """Run analysis.
        @return: list of dropped files with related information.
        """
        self.key = "dropped"
        dropped_files, meta = [], {}

        if os.path.exists(self.dropped_meta_path):
            for line in open(self.dropped_meta_path, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                }

        for dir_name, dir_names, file_names in os.walk(self.dropped_path):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                file_info.update(meta.get(file_info["path"], {}))
                dropped_files.append(file_info)

        for dir_name, dir_names, file_names in os.walk(self.package_files):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                dropped_files.append(file_info)

        return dropped_files

Example #3

Show file

File: procmemory.py Project: Tal14/cuckoo-modified

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path)
                )

                results.append(proc)

        return results

Example #4

Show file

File: procmemory.py Project: rccypher/cuckoo

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)

                # Let's hope the file is not too big.
                buf = open(dmp_path, "rb").read()
                urls = set()
                for url in re.findall(HTTP_REGEX, buf):
                    if not is_whitelisted_domain(url[1]):
                        urls.add("".join(url))

                proc = dict(
                    file=dmp_path,
                    pid=os.path.splitext(os.path.basename(dmp_path))[0],
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    urls=list(urls),
                )

                results.append(proc)

        return results

Example #5

Show file

File: apkinfo.py Project: evandowning/cuckoo

    def run(self):
        """Run androguard to extract static android information
                @return: list of static features
        """
        self.key = "apkinfo"
        apkinfo = {}

        if "file" not in self.task["category"] or not HAVE_ANDROGUARD:
            return

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    manifest = {}
                    apkinfo["files"] = self._apk_files(a)
                    manifest["package"] = a.get_package()
                    # manifest["permissions"]=a.get_details_permissions_new()
                    manifest["main_activity"] = a.get_main_activity()
                    manifest["activities"] = a.get_activities()
                    manifest["services"] = a.get_services()
                    manifest["receivers"] = a.get_receivers()
                    # manifest["receivers_actions"]=a.get__extended_receivers()
                    manifest["providers"] = a.get_providers()
                    manifest["libraries"] = a.get_libraries()
                    apkinfo["manifest"] = manifest
                    # apkinfo["certificate"] = a.get_certificate()
                    static_calls = {}
                    if self.check_size(apkinfo["files"]):
                        vm = DalvikVMFormat(a.get_dex())
                        vmx = uVMAnalysis(vm)

                        static_calls["all_methods"] = self.get_methods(vmx)
                        static_calls["is_native_code"] = analysis.is_native_code(vmx)
                        static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx)
                        static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx)

                        # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx)
                        # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx)
                        # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx)
                        # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx)
                        # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx)
                    else:
                        log.warning("Dex size bigger than: %s",
                                    self.options.decompilation_threshold)
                    apkinfo["static_method_calls"] = static_calls
            except (IOError, OSError, BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return apkinfo

Example #6

Show file

File: procmemory.py Project: bit111/cuckoo

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if self.options.get("extract_img") and not HAVE_PEFILE:
            log.warning(
                "In order to extract PE files from memory dumps it is "
                "required to have pefile installed (`pip install pefile`)."
            )

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                pid, num = map(int, re.findall("(\\d+)", dmp))

                proc = dict(
                    file=dump_path, pid=pid, num=num,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)

                if self.options.get("extract_img") and HAVE_PEFILE:
                    proc["extracted"] = list(self.dump_images(proc))

                if self.options.get("dump_delete"):
                    try:
                        os.remove(dump_path)
                    except OSError:
                        log.error("Unable to delete memory dump file at path \"%s\"", dump_path)

                results.append(proc)

        results.sort(key=lambda x: (x["pid"], x["num"]))
        return results

Example #7

Show file

File: procmemory.py Project: bschmoker/cuckoo

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        for dmp in os.listdir(self.pmemory_path):
            dmp_path = os.path.join(self.pmemory_path, dmp)
            dmp_file = File(dmp_path)

            proc = dict(
                yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar"))
            )

            results.append(proc)

        return results

Example #8

Show file

File: procmemory.py Project: 1LoneWolf1/cuckoo

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                dump_name = os.path.basename(dump_path)
                pid = int(re.findall("(\d{2,5})", dump_name)[0])

                proc = dict(
                    file=dump_path, pid=pid,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)
                    
                if self.options.get("dump_delete"):
                    try:
                        os.remove(dump_path)
                    except OSError:
                        log.error("Unable to delete memory dump file at path \"%s\"", dump_path)

                results.append(proc)

        return results

Example #9

Show file

def demux_sample(filename, package, options):
    """
    If file is a ZIP, extract its included files and return their file paths
    If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs)
    If file is a password-protected Office doc and password is supplied, return path to decrypted doc
    """

    magic = File(filename).get_type()

    # if file is an Office doc and password is supplied, try to decrypt the doc
    if "Microsoft" in magic or "Composite Document File" in magic or "CDFV2 Encrypted" in magic:
        password = None
        if "password="******",")
            for field in fields:
                try:
                    key, value = field.split("=", 1)
                    if key == "password":
                        password = value
                        break
                except:
                    pass
        if password:
            return demux_office(filename, password)
        else:
            return [filename]

    # if a package was specified, then don't do anything special
    # this will allow for the ZIP package to be used to analyze binaries with included DLL dependencies
    # do the same if file= is specified in the options
    if package or "file=" in options:
        return [filename]

    # don't try to extract from Java archives or executables
    if "Java Jar" in magic:
        return [filename]
    if "PE32" in magic or "MS-DOS executable" in magic:
        return [filename]

    retlist = demux_zip(filename, options)
    if not retlist:
        retlist = demux_rar(filename, options)
    if not retlist:
        retlist = demux_tar(filename, options)
    if not retlist:
        retlist = demux_email(filename, options)
    if not retlist:
        retlist = demux_msg(filename, options)
    if not retlist:
        retlist = demux_tnef(filename, options)
    # handle ZIPs/RARs inside extracted files
    if retlist:
        newretlist = []
        for item in retlist:
            zipext = demux_zip(item, options)
            if zipext:
                newretlist.extend(zipext)
            else:
                rarext = demux_rar(item, options)
                if rarext:
                    newretlist.extend(rarext)
                else:
                    tarext = demux_tar(item, options)
                    if tarext:
                        newretlist.extend(tarext)
                    else:
                        newretlist.append(item)
        retlist = newretlist

    # if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the
    # original file

    if not retlist:
        retlist.append(filename)

    return retlist

Example #10

Show file

File: aws.py Project: sooshie/cuckoo-modified

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to S3.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        self.s3_region = self.options.get("region", "us-west-2")
        self.s3_access_key = self.options.get("access_key", "")
        self.s3_secret_key = self.options.get("secret_key", "")
        s3_reports_bucket_name = self.options.get("reports_bucket", "")
        s3_shots_bucket_name = self.options.get("shots_bucket", "")
        s3_samples_bucket_name = self.options.get("samples_bucket", "")
        s3_files_bucket_name = self.options.get("files_bucket", "")
        s3_aux_bucket_name = self.options.get("aux_bucket", "")
        s3_logs_bucket_name = self.options.get("logs_bucket", "")
        s3_pcap_bucket_name = self.options.get("pcap_bucket", "")
        s3_md5_bucket_name = self.options.get("md5_bucket", "")
        cleanup = self.options.get("cleanup", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file))
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                chunk_count = 0
                # Using this type of prefix is useful because you can always re-construct it from
                # the original results
                #chunk_prefix = str(results['info']['id']) + '/' + process['process_name']
                chunk_prefix = str(results['info']['id']) + '/' + str(process['process_id'])
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in S1.

                    if len(chunk) == 100:
                        chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                        #log.debug("INFO TIME!")
                        #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                        #log.debug(chunk_prefix)
                        err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk))
                        if err != '':
                            log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err))
                        else:
                            chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count))
                            chunk_count += 1
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                    #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                    err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk))
                    if err != '':
                        log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err))
                    else:
                        chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count))

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"], results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])

        # Store the report (it's 'object id' is simply the analysis id)
        # First make sure it's not too big (5gb limit)
        data = json.dumps(report)
        if len(data) < 5000000000:
            err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data)
            if err != '':
                log.error("Non-size related issue saving analysis JSON to S3 for report {0} - {1}".format(results['info']['id'], err))
        else:
            log.error("JSON for analysis id {0} is greater than 5GB".format(results['info']['id']))

        #processes the rest of the analysis files and put them in S3
        if s3_shots_bucket_name != '':
            shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots")
            self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name)
        if s3_pcap_bucket_name != '':
            if os.path.isfile(self.analysis_path + '/dump.pcap'):
                with open(self.analysis_path + '/dump.pcap', 'rb') as infile:
                    self.save_to_s3(s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read())
            if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'):
                with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile:
                    self.save_to_s3(s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read())
        if s3_aux_bucket_name != '':
            aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux")
            self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name)
        if s3_logs_bucket_name != '':
            logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs")
            self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name)
        if s3_samples_bucket_name != '':
            sample = os.path.realpath(self.analysis_path + '/binary')
            with open(sample, 'rb') as infile:
                self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read())
        #log.debug(s3_files_bucket_name)
        if s3_files_bucket_name != '':
            #log.debug(self.analysis_path)
            for root, dirnames, filenames in os.walk(self.analysis_path + '/files'):
                #log.debug(filenames)
                for filename in filenames:
                    key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] + '/' + filename
                    if not filename.endswith('_info.txt'):
                        key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1]
                    #log.debug(key_name)
                    with open(os.path.join(root, filename), 'rb') as infile:
                        self.save_to_s3(s3_files_bucket_name, key_name, infile.read())
        if s3_md5_bucket_name != '':
            info = {}
            info.update(report['info'])
            info.update(report['target'])
            self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info))

        if cleanup:
            shutil.rmtree(self.analysis_path)

Example #11

Show file

File: mongodb.py Project: 1000rub/cuckoo

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        for dropped in report["dropped"]:
            new_drop = dict(dropped)
            drop = File(dropped["path"])
            if drop.valid():
                dropped_id = self.store_file(drop, filename=dropped["name"])
                new_drop["object_id"] = dropped_id

            new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"],
                                 "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()

Example #12

Show file

def index(request, resubmit_hash=False):
    if request.method == "POST":

        static, package, timeout, priority, options, machine, platform, tags, custom, memory, \
            clock, enforce_timeout, shrike_url, shrike_msg, shrike_sid, shrike_refer, unique, referrer, \
            tlp = parse_request_arguments(request)

        # This is done to remove spaces in options but not breaks custom paths
        options = ",".join("=".join(value.strip()
                                    for value in option.split("=", 1))
                           for option in options.split(",")
                           if option and "=" in option)
        opt_filename = get_user_filename(options, custom)

        if priority and web_conf.public.enabled and web_conf.public.priority:
            priority = web_conf.public.priority

        if timeout and web_conf.public.enabled and web_conf.public.timeout:
            timeout = web_conf.public.timeout

        if options:
            options += ","

        if referrer:
            options += "referrer=%s," % (referrer)

        if request.POST.get("free"):
            options += "free=yes,"

        if request.POST.get("nohuman"):
            options += "nohuman=yes,"

        if request.POST.get("tor"):
            options += "tor=yes,"

        if request.POST.get("route", None):
            options += "route={0},".format(request.POST.get("route", None))

        if request.POST.get("process_dump"):
            options += "procdump=0,"

        if request.POST.get("process_memory"):
            options += "procmemdump=1,"

        if request.POST.get("import_reconstruction"):
            options += "import_reconstruction=1,"

        if request.POST.get("disable_cape"):
            options += "disable_cape=1,"

        if request.POST.get("kernel_analysis"):
            options += "kernel_analysis=yes,"

        if request.POST.get("norefer"):
            options += "norefer=1,"

        if request.POST.get("oldloader"):
            options += "loader=oldloader.exe,loader_64=oldloader_x64.exe,"

        if request.POST.get("unpack"):
            options += "unpack=yes,"

        options = options[:-1]

        task_machines = []
        opt_apikey = False
        opts = get_options(options)
        if opts:
            opt_apikey = opts.get("apikey", False)

        status = "ok"
        task_ids_tmp = list()

        details = {
            "errors": [],
            "content": False,
            "request": request,
            "task_ids": [],
            "url": False,
            "params": {},
            "headers": {},
            "service": "Local",
            "path": "",
            "fhash": False,
            "options": options,
            "only_extraction": False,
            "task_machines": task_machines,
        }

        if "hash" in request.POST and request.POST.get(
                "hash", False) and request.POST.get("hash")[0] != '':
            resubmission_hash = request.POST.get("hash").strip()
            paths = db.sample_path_by_hash(resubmission_hash)
            if paths:
                content = get_file_content(paths)
                if not content:
                    return render(
                        request, "error.html", {
                            "error":
                            "Can't find {} on disk, {}".format(
                                resubmission_hash, str(paths))
                        })
                folder = os.path.join(settings.TEMP_PATH, "cape-resubmit")
                if not os.path.exists(folder):
                    os.makedirs(folder)
                base_dir = tempfile.mkdtemp(prefix='resubmit_', dir=folder)
                if opt_filename:
                    filename = base_dir + "/" + opt_filename
                else:
                    filename = base_dir + "/" + sanitize_filename(
                        resubmission_hash)
                path = store_temp_file(content, filename)
                details["path"] = path
                details["content"] = content
                status, task_ids_tmp = download_file(**details)
                if status == "error":
                    details["errors"].append(
                        {os.path.basename(filename): task_ids_tmp})
                else:
                    details["task_ids"] = task_ids_tmp
            else:
                return render(
                    request, "error.html",
                    {"error": "File not found on hdd for resubmission"})

        elif "sample" in request.FILES:
            samples = request.FILES.getlist("sample")
            details["service"] = "WebGUI"
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    details["errors"].append(
                        {sample.name: "You uploaded an empty file."})
                    continue
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    details["errors"].append({
                        sample.name:
                        "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                    })
                    continue

                if opt_filename:
                    filename = opt_filename
                else:
                    filename = sanitize_filename(sample.name)
                # Moving sample from django temporary file to CAPE temporary storage to let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), filename)
                if unique and db.check_file_uniq(File(path).get_sha256()):
                    return render(
                        request, "error.html", {
                            "error":
                            "Duplicated file, disable unique option to force submission"
                        })

                if timeout and web_conf.public.enabled and web_conf.public.timeout and timeout > web_conf.public.timeout:
                    timeout = web_conf.public.timeout

                magic_type = get_magic_type(path)
                platform = get_platform(magic_type)
                if machine.lower() == "all":
                    details["task_machines"] = [
                        vm.name for vm in db.list_machines(platform=platform)
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if hasattr(machine_details, "platform"
                               ) and not machine_details.platform == platform:
                        return render(
                            request,
                            "error.html",
                            {
                                "error":
                                "Wrong platform, {} VM selected for {} sample".
                                format(machine_details.platform, platform)
                            },
                        )
                    else:
                        details["task_machines"] = [machine]

                else:
                    details["task_machines"] = ["first"]
                details["path"] = path
                details["content"] = get_file_content(path)
                status, task_ids = download_file(**details)

        elif "quarantine" in request.FILES:
            samples = request.FILES.getlist("quarantine")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty quarantine file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a quarantine file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                tmp_path = store_temp_file(sample.read(), sample.name)

                path = unquarantine(tmp_path)
                try:
                    os.remove(tmp_path)
                except Exception as e:
                    print(e)

                if not path:
                    return render(request, "error.html", {
                        "error":
                        "You uploaded an unsupported quarantine file."
                    })

                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform="windows")
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == "windows":
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, linux VM selected for {} sample"
                                .format(machine_details.platform)
                            })
                    else:
                        task_machines = [machine]

                details["path"] = path
                details["content"] = get_file_content(path)
                status, task_ids_tmp = download_file(**details)
                if status == "error":
                    details["errors"].append({sample.name: task_ids_tmp})
                else:
                    details["task_ids"] = task_ids_tmp

        elif "static" in request.FILES:
            samples = request.FILES.getlist("static")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                task_id = db.add_static(file_path=path,
                                        priority=priority,
                                        tlp=tlp)
                if not task_id:
                    return render(
                        request, "error.html",
                        {"error": "We don't have static extractor for this"})
                task_ids.append(task_id)

        elif "pcap" in request.FILES:
            samples = request.FILES.getlist("pcap")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty PCAP file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a PCAP file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                if sample.name.lower().endswith(".saz"):
                    saz = saz_to_pcap(path)
                    if saz:
                        try:
                            os.remove(path)
                        except Exception as e:
                            pass
                        path = saz
                    else:
                        return render(
                            request, "error.html",
                            {"error": "Conversion from SAZ to PCAP failed."})

                task_id = db.add_pcap(file_path=path,
                                      priority=priority,
                                      tlp=tlp)
                if task_id:
                    task_ids.append(task_id)

        elif "url" in request.POST and request.POST.get("url").strip():
            url = request.POST.get("url").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")

            if machine.lower() == "all":
                details["task_machines"] = [
                    vm.name for vm in db.list_machines(platform="windows")
                ]
            elif machine:
                machine_details = db.view_machine(machine)
                if hasattr(machine_details, "platform"
                           ) and not machine_details.platform == "windows":
                    details["errors"].append({
                        os.path.basename(url):
                        "Wrong platform, linux VM selected for {} sample".
                        format(machine_details.platform)
                    })
                else:
                    details["task_machines"] = [machine]

            details["path"] = path
            details["content"] = get_file_content(path)
            status, task_ids_tmp = download_file(**details)
            if status == "error":
                details["errors"].append({sample.name: task_ids_tmp})
            else:
                details["task_ids"] = task_ids_tmp

        elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip():
            url = request.POST.get("dlnexec").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")
            response = _download_file(request.POST.get("route", None), url,
                                      options)
            if not response:
                return render(request, "error.html",
                              {"error": "Was impossible to retrieve url"})

            name = os.path.basename(url)
            if not "." in name:
                name = get_user_filename(options,
                                         custom) or generate_fake_name()

            if machine.lower() == "all":
                details["task_machines"] = [
                    vm.name for vm in db.list_machines(platform=platform)
                ]
            elif machine:
                machine_details = db.view_machine(machine[0])
                if hasattr(machine_details, "platform"
                           ) and not machine_details.platform == platform:
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, {} VM selected for {} sample".
                            format(machine_details.platform, platform)
                        })
                else:
                    details["task_machines"] = [machine]

            path = store_temp_file(response, name)
            details["path"] = path
            details["content"] = get_file_content(path)
            details["service"] = "DLnExec"
            status, task_ids_tmp = download_file(**details)
            if status == "error":
                details["errors"].append({name: task_ids_tmp})
            else:
                details["task_ids"] = task_ids_tmp
        elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get(
                "vtdl", False) and request.POST.get("vtdl")[0] != "":
            if not settings.VTDL_KEY or not settings.VTDL_PATH:
                return render(
                    request, "error.html", {
                        "error":
                        "You specified VirusTotal but must edit the file and specify your VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory"
                    })
            else:
                if opt_apikey:
                    details["apikey"] = opt_apikey
                details = download_from_vt(
                    request.POST.get("vtdl").strip(), details, opt_filename,
                    settings)

        if details.get("task_ids"):
            tasks_count = len(details["task_ids"])
        else:
            tasks_count = 0
        if tasks_count > 0:
            data = {
                "tasks": details["task_ids"],
                "tasks_count": tasks_count,
                "errors": details["errors"]
            }
            return render(request, "submission/complete.html", data)
        else:
            return render(
                request, "error.html", {
                    "error": "Error adding task(s) to CAPE's database.",
                    "errors": details["errors"]
                })
    else:
        enabledconf = dict()
        enabledconf["vt"] = settings.VTDL_ENABLED
        enabledconf["kernel"] = settings.OPT_ZER0M0N
        enabledconf["memory"] = processing.memory.get("enabled")
        enabledconf["procmemory"] = processing.procmemory.get("enabled")
        enabledconf["dlnexec"] = settings.DLNEXEC
        enabledconf["url_analysis"] = settings.URL_ANALYSIS
        enabledconf["tags"] = False
        enabledconf[
            "dist_master_storage_only"] = repconf.distributed.master_storage_only
        enabledconf["linux_on_gui"] = web_conf.linux.enabled
        enabledconf["tlp"] = web_conf.tlp.enabled

        if all_vms_tags:
            enabledconf["tags"] = True

        if not enabledconf["tags"]:
            # load multi machinery tags:
            # Get enabled machinery
            machinery = cfg.cuckoo.get("machinery")
            if machinery == "multi":
                for mmachinery in Config(machinery).multi.get(
                        "machinery").split(","):
                    vms = [
                        x.strip() for x in getattr(Config(
                            mmachinery), mmachinery).get("machines").split(",")
                    ]
                    if any([
                            "tags"
                            in list(getattr(Config(mmachinery), vmtag).keys())
                            for vmtag in vms
                    ]):
                        enabledconf["tags"] = True
                        break
            else:
                # Get VM names for machinery config elements
                vms = [
                    x.strip() for x in getattr(Config(
                        machinery), machinery).get("machines").split(",")
                ]
                # Check each VM config element for tags
                if any([
                        "tags"
                        in list(getattr(Config(machinery), vmtag).keys())
                        for vmtag in vms
                ]):
                    enabledconf["tags"] = True

        packages, machines = get_form_data("windows")

        socks5s = _load_socks5_operational()
        socks5s_random = ""
        if socks5s:
            socks5s_random = random.choice(list(socks5s.values())).get(
                "description", False)

        return render(
            request,
            "submission/index.html",
            {
                "packages": sorted(packages),
                "machines": machines,
                "vpns": list(vpns.values()),
                "socks5s": list(socks5s.values()),
                "socks5s_random": socks5s_random,
                "route": routing.routing.route,
                "internet": routing.routing.internet,
                "inetsim": routing.inetsim.enabled,
                "tor": routing.tor.enabled,
                "config": enabledconf,
                "resubmit": resubmit_hash,
                "tags": sorted(list(set(all_vms_tags))),
            },
        )

Example #13

Show file

File: views.py Project: naxonez/CAPEv2

def index(request, resubmit_hash=False):
    if request.method == "POST":
        package = request.POST.get("package", "")
        timeout = min(force_int(request.POST.get("timeout")), 60 * 60 * 24)
        options = request.POST.get("options", "")
        lin_options = request.POST.get("lin_options", "")
        priority = force_int(request.POST.get("priority"))
        machine = request.POST.get("machine", "")
        clock = request.POST.get(
            "clock",
            datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S"))
        if not clock:
            clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")
        if "1970" in clock:
            clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")
        custom = request.POST.get("custom", "")
        memory = bool(request.POST.get("memory", False))
        enforce_timeout = bool(request.POST.get("enforce_timeout", False))
        referrer = validate_referrer(request.POST.get("referrer", None))
        tags = request.POST.get("tags", None)
        static = bool(request.POST.get("static", False))
        all_tags = load_vms_tags()
        if tags and not all(
            [tag.strip() in all_tags for tag in tags.split(",")]):
            return render(request, "error.html", {
                "error":
                "Check Tags help, you have introduced incorrect tag(s)"
            })

        if lin_options:
            options = lin_options
        # This is done to remove spaces in options but not breaks custom paths
        options = ','.join('='.join(value.strip()
                                    for value in option.split("=", 1))
                           for option in options.split(",")
                           if option and '=' in option)
        opt_filename = get_user_filename(options, custom)

        if referrer:
            if options:
                options += ","
            options += "referrer=%s" % (referrer)

        if request.POST.get("free"):
            if options:
                options += ","
            options += "free=yes"

        if request.POST.get("nohuman"):
            if options:
                options += ","
            options += "nohuman=yes"

        if request.POST.get("tor"):
            if options:
                options += ","
            options += "tor=yes"

        if request.POST.get("route", None):
            if options:
                options += ","
            options += "route={0}".format(request.POST.get("route", None))

        if request.POST.get("process_dump"):
            if options:
                options += ","
            options += "procmemdump=1,procdump=1"

        if request.POST.get("process_memory"):
            if options:
                options += ","
            options += "procmemdump=1,procdump=1"

        if request.POST.get("import_reconstruction"):
            if options:
                options += ","
            options += "import_reconstruction=1"

        if request.POST.get("disable_cape"):
            if options:
                options += ","
            options += "disable_cape=1"

        if request.POST.get("kernel_analysis"):
            if options:
                options += ","
            options += "kernel_analysis=yes"

        if request.POST.get("norefer"):
            if options:
                options += ","
            options += "norefer=1"

        if request.POST.get("oldloader"):
            if options:
                options += ","
            options += "loader=oldloader.exe,loader_64=oldloader_x64.exe"

        if request.POST.get("unpack"):
            if options:
                options += ","
            options += "unpack=yes"

        unique = request.POST.get("unique", False)

        orig_options = options
        task_ids = []
        task_machines = []

        status = "ok"
        failed_hashes = list()
        task_ids_tmp = list()
        if "hash" in request.POST and request.POST.get(
                "hash", False) and request.POST.get("hash")[0] != '':
            resubmission_hash = request.POST.get("hash").strip()
            paths = db.sample_path_by_hash(resubmission_hash)
            if paths:
                paths = [
                    _f for _f in [
                        path if os.path.exists(path) else False
                        for path in paths
                    ] if _f
                ]
                if not paths and FULL_DB:
                    tasks = results_db.analysis.find(
                        {"dropped.sha256": resubmission_hash})
                    if tasks:
                        for task in tasks:
                            # grab task id and replace in path if needed aka distributed hack
                            path = os.path.join(settings.CUCKOO_PATH,
                                                "storage", "analyses",
                                                str(task["info"]["id"]),
                                                "files", resubmission_hash)
                            if os.path.exists(path):
                                paths = [path]
                                break

            if paths:
                content = False
                content = get_file_content(paths)
                if not content:
                    return render(
                        request, "error.html", {
                            "error":
                            "Can't find {} on disk, {}".format(
                                resubmission_hash, str(paths))
                        })
                base_dir = tempfile.mkdtemp(prefix='resubmit_',
                                            dir=settings.TEMP_PATH)
                if opt_filename:
                    filename = base_dir + "/" + opt_filename
                else:
                    filename = base_dir + "/" + sanitize_filename(
                        resubmission_hash)
                path = store_temp_file(content, filename)
                headers = {}
                url = 'local'
                params = {}

                status, task_ids = download_file(
                    False, content, request, db, task_ids, url, params,
                    headers, "Local", path, package, timeout, options,
                    priority, machine, clock, custom, memory, enforce_timeout,
                    referrer, tags, orig_options, "", static)
            else:
                return render(
                    request, "error.html",
                    {"error": "File not found on hdd for resubmission"})

        elif "sample" in request.FILES:
            samples = request.FILES.getlist("sample")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum allowed upload size "
                            "specified in web/web/local_settings.py."
                        })

                if opt_filename:
                    filename = opt_filename
                else:
                    filename = sample.name
                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), filename)

                if unique and db.check_file_uniq(File(path).get_sha256()):
                    return render(
                        request, "error.html", {
                            "error":
                            "Duplicated file, disable unique option to force submission"
                        })

                magic_type = get_magic_type(path)
                if disable_x64 is True:
                    if magic_type and ("x86-64" in magic_type
                                       or "PE32+" in magic_type):
                        if len(samples) == 1:
                            return render(
                                request, "error.html",
                                {"error": "Sorry no x64 support yet"})
                        else:
                            continue

                    orig_options, timeout, enforce_timeout = recon(
                        path, orig_options, timeout, enforce_timeout)

                platform = get_platform(magic_type)
                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform=platform)
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == platform:
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, {} VM selected for {} sample".
                                format(machine_details.platform, platform)
                            })
                    else:
                        task_machines.append(machine)

                else:
                    task_machines.append("first")

                for entry in task_machines:
                    if entry == "first":
                        entry = None
                    try:
                        task_ids_new = db.demux_sample_and_add_to_db(
                            file_path=path,
                            package=package,
                            timeout=timeout,
                            options=options,
                            priority=priority,
                            machine=entry,
                            custom=custom,
                            memory=memory,
                            platform=platform,
                            enforce_timeout=enforce_timeout,
                            tags=tags,
                            clock=clock,
                            static=static)
                        task_ids.extend(task_ids_new)
                    except CuckooDemuxError as err:
                        return render(request, "error.html", {"error": err})

        elif "quarantine" in request.FILES:
            samples = request.FILES.getlist("quarantine")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty quarantine file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a quarantine file that exceeds the maximum \
                                  allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                tmp_path = store_temp_file(sample.read(), sample.name)

                path = unquarantine(tmp_path)
                try:
                    os.remove(tmp_path)
                except Exception as e:
                    pass

                if not path:
                    return render(request, "error.html", {
                        "error":
                        "You uploaded an unsupported quarantine file."
                    })

                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform="windows")
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == "windows":
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, linux VM selected for {} sample"
                                .format(machine_details.platform)
                            })
                    else:
                        task_machines.append(machine)

                if not task_machines:
                    task_machines.append("first")

                for entry in task_machines:
                    if entry == "first":
                        entry = None
                    task_ids_new = db.demux_sample_and_add_to_db(
                        file_path=path,
                        package=package,
                        timeout=timeout,
                        options=options,
                        priority=priority,
                        machine=entry,
                        custom=custom,
                        memory=memory,
                        tags=tags,
                        enforce_timeout=enforce_timeout,
                        clock=clock)
                    if task_ids_new:
                        task_ids.extend(task_ids_new)

        elif "static" in request.FILES:
            samples = request.FILES.getlist("static")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum \
                    allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                task_id = db.add_static(file_path=path, priority=priority)
                if not task_id:
                    return render(
                        request, "error.html",
                        {"error": "We don't have static extractor for this"})
                task_ids.append(task_id)

        elif "pcap" in request.FILES:
            samples = request.FILES.getlist("pcap")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty PCAP file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a PCAP file that exceeds the maximum \
                     allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                if sample.name.lower().endswith(".saz"):
                    saz = saz_to_pcap(path)
                    if saz:
                        try:
                            os.remove(path)
                        except Exception as e:
                            pass
                        path = saz
                    else:
                        return render(
                            request, "error.html",
                            {"error": "Conversion from SAZ to PCAP failed."})

                task_id = db.add_pcap(file_path=path, priority=priority)
                if task_id:
                    task_ids.append(task_id)

        elif "url" in request.POST and request.POST.get("url").strip():
            url = request.POST.get("url").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")

            if machine.lower() == "all":
                task_machines = [
                    vm.name for vm in db.list_machines(platform="windows")
                ]
            elif machine:
                machine_details = db.view_machine(machine)
                if not machine_details.platform == "windows":
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, linux VM selected for {} sample".
                            format(machine_details.platform)
                        })
                else:
                    task_machines.append(machine)

            else:
                task_machines.append("first")

            for entry in task_machines:
                if entry == "first":
                    entry = None
                task_ids_new = db.add_url(url=url,
                                          package=package,
                                          timeout=timeout,
                                          options=options,
                                          priority=priority,
                                          machine=entry,
                                          custom=custom,
                                          memory=memory,
                                          enforce_timeout=enforce_timeout,
                                          tags=tags,
                                          clock=clock)
                if task_ids_new:
                    task_ids.extend(task_ids_new)

        elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip():
            url = request.POST.get("dlnexec").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")
            response = _download_file(request.POST.get("route", None), url,
                                      options)
            if not response:
                return render(request, "error.html",
                              {"error": "Was impossible to retrieve url"})

            name = os.path.basename(url)
            if not "." in name:
                name = get_user_filename(options,
                                         custom) or generate_fake_name()
            path = store_temp_file(response, name)

            magic_type = get_magic_type(path)
            platform = get_platform(magic_type)

            if machine.lower() == "all":
                task_machines = [
                    vm.name for vm in db.list_machines(platform=platform)
                ]
            elif machine:
                machine_details = db.view_machine(machine[0])
                if not machine_details.platform == platform:
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, {} VM selected for {} sample".
                            format(machine_details.platform, platform)
                        })
                else:
                    task_machines.append(machine)
            else:
                task_machines.append("first")
            for entry in task_machines:
                if entry == "first":
                    entry = None
                task_ids_new = db.demux_sample_and_add_to_db(
                    file_path=path,
                    package=package,
                    timeout=timeout,
                    options=options,
                    priority=priority,
                    machine=entry,
                    custom=custom,
                    memory=memory,
                    enforce_timeout=enforce_timeout,
                    tags=tags,
                    platform=platform,
                    clock=clock)
                if task_ids_new:
                    task_ids.extend(task_ids_new)

        elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get("vtdl", False) \
                and request.POST.get("vtdl")[0] != '':
            vtdl = request.POST.get("vtdl").strip()
            if (not settings.VTDL_PRIV_KEY
                    and not settings.VTDL_INTEL_KEY) or not settings.VTDL_PATH:
                return render(
                    request, "error.html", {
                        "error":
                        "You specified VirusTotal but must edit the file and specify your "
                        "VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory"
                    })
            else:
                hashlist = []
                if "," in vtdl:
                    hashlist = [
                        _f for _f in vtdl.replace(" ", "").strip().split(",")
                        if _f
                    ]
                else:
                    hashlist.append(vtdl)

                for h in hashlist:
                    base_dir = tempfile.mkdtemp(prefix='cuckoovtdl',
                                                dir=settings.VTDL_PATH)
                    task_ids_tmp = list()
                    if opt_filename:
                        filename = base_dir + "/" + opt_filename
                    else:
                        filename = base_dir + "/" + sanitize_filename(h)
                    headers = {}
                    paths = db.sample_path_by_hash(h)
                    content = False
                    if paths:
                        content = get_file_content(paths)
                    if settings.VTDL_PRIV_KEY:
                        headers = {'x-apikey': settings.VTDL_PRIV_KEY}
                    elif settings.VTDL_INTEL_KEY:
                        headers = {'x-apikey': settings.VTDL_INTEL_KEY}
                    url = "https://www.virustotal.com/api/v3/files/{id}/download".format(
                        id=h)
                    params = {}

                    if not content:
                        status, task_ids_tmp = download_file(
                            False, content, request, db, task_ids, url, params,
                            headers, "VirusTotal", filename, package, timeout,
                            options, priority, machine, clock, custom, memory,
                            enforce_timeout, referrer, tags, orig_options, "",
                            static, h)
                    else:
                        status, task_ids_tmp = download_file(
                            False, content, request, db, task_ids, url, params,
                            headers, "Local", filename, package, timeout,
                            options, priority, machine, clock, custom, memory,
                            enforce_timeout, referrer, tags, orig_options, "",
                            static, h)
                    if status is "ok":
                        task_ids = task_ids_tmp
                    else:
                        failed_hashes.append(h)

        if not isinstance(task_ids, list) and status == "error":
            # is render msg
            return task_ids
        if not isinstance(task_ids_tmp, list) and status == "error":
            # is render msg
            return task_ids_tmp
        if isinstance(task_ids, list):
            tasks_count = len(task_ids)
        else:
            # ToDo improve error msg
            tasks_count = 0
        tasks_count = len(task_ids)
        if tasks_count > 0:
            data = {"tasks": task_ids, "tasks_count": tasks_count}
            if failed_hashes:
                data["failed_hashes"] = failed_hashes
            return render(request, "submission/complete.html", data)

        else:
            return render(request, "error.html",
                          {"error": "Error adding task to Cuckoo's database."})
    else:
        enabledconf = dict()
        enabledconf["vt"] = settings.VTDL_ENABLED
        enabledconf["kernel"] = settings.OPT_ZER0M0N
        enabledconf["memory"] = processing.memory.get("enabled")
        enabledconf["procmemory"] = processing.procmemory.get("enabled")
        enabledconf["dlnexec"] = settings.DLNEXEC
        enabledconf["tags"] = False
        enabledconf[
            "dist_master_storage_only"] = repconf.distributed.master_storage_only

        all_tags = load_vms_tags()
        if all_tags:
            enabledconf["tags"] = True

        if not enabledconf["tags"]:
            # load multi machinery tags:
            # Get enabled machinery
            machinery = cfg.cuckoo.get("machinery")
            if machinery == "multi":
                for mmachinery in Config(machinery).multi.get(
                        "machinery").split(","):
                    vms = [
                        x.strip() for x in getattr(Config(
                            mmachinery), mmachinery).get("machines").split(",")
                    ]
                    if any([
                            "tags"
                            in list(getattr(Config(mmachinery), vmtag).keys())
                            for vmtag in vms
                    ]):
                        enabledconf["tags"] = True
                        break
            else:
                # Get VM names for machinery config elements
                vms = [
                    x.strip() for x in getattr(Config(
                        machinery), machinery).get("machines").split(",")
                ]
                # Check each VM config element for tags
                if any([
                        "tags"
                        in list(getattr(Config(machinery), vmtag).keys())
                        for vmtag in vms
                ]):
                    enabledconf["tags"] = True

        packages, machines = get_form_data("windows")

        socks5s = _load_socks5_operational()
        socks5s_random = ""
        if socks5s:
            socks5s_random = random.choice(list(socks5s.values())).get(
                "description", False)

        return render(
            request, "submission/index.html", {
                "packages": sorted(packages),
                "machines": machines,
                "vpns": list(vpns.values()),
                "socks5s": list(socks5s.values()),
                "socks5s_random": socks5s_random,
                "route": routing.routing.route,
                "internet": routing.routing.internet,
                "inetsim": routing.inetsim.enabled,
                "tor": routing.tor.enabled,
                "config": enabledconf,
                "resubmit": resubmit_hash,
                "tags": sorted(list(set(all_tags)))
            })

Example #14

Show file

File: elasticsearchdb.py Project: CIRCL/cuckoo-modified

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to Elasticsearch.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_ELASTICSEARCH:
            raise CuckooDependencyError("Unable to import elasticsearch "
                                        "(install with `pip install elasticsearch`)")

        self.connect()
        index_prefix  = self.options.get("index", "cuckoo")
        search_only   = self.options.get("searchonly", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        idxdate = report["info"]["started"].split(" ")[0]
        self.index_name = '{0}-{1}'.format(index_prefix, idxdate)

        if not search_only:
            if not "network" in report:
                report["network"] = {}

            # Store API calls in chunks for pagination in Django
            if "behavior" in report and "processes" in report["behavior"]:
                new_processes = []
                for process in report["behavior"]["processes"]:
                    new_process = dict(process)
                    chunk = []
                    chunks_ids = []
                    # Loop on each process call.
                    for index, call in enumerate(process["calls"]):
                        # If the chunk size is 100 or if the loop is completed then
                        # store the chunk in Elastcisearch.
                        if len(chunk) == 100:
                            to_insert = {"pid": process["process_id"],
                                         "calls": chunk}
                            pchunk = self.es.index(index=self.index_name,
                                                   doc_type="calls", body=to_insert)
                            chunk_id = pchunk['_id']
                            chunks_ids.append(chunk_id)
                            # Reset the chunk.
                            chunk = []

                        # Append call to the chunk.
                        chunk.append(call)

                    # Store leftovers.
                    if chunk:
                        to_insert = {"pid": process["process_id"], "calls": chunk}
                        pchunk = self.es.index(index=self.index_name, 
                                               doc_type="calls", body=to_insert)
                        chunk_id = pchunk['_id']
                        chunks_ids.append(chunk_id)

                    # Add list of chunks.
                    new_process["calls"] = chunks_ids
                    new_processes.append(new_process)

                # Store the results in the report.
                report["behavior"] = dict(report["behavior"])
                report["behavior"]["processes"] = new_processes

            # Add screenshot paths
            report["shots"] = []
            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                shots = [shot for shot in os.listdir(shots_path)
                         if shot.endswith(".jpg")]
                for shot_file in sorted(shots):
                    shot_path = os.path.join(self.analysis_path, "shots",
                                             shot_file)
                    screenshot = File(shot_path)
                    if screenshot.valid():
                        # Strip the extension as it's added later 
                        # in the Django view
                        report["shots"].append(shot_file.replace(".jpg", ""))

            if results.has_key("suricata") and results["suricata"]:
                if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                    report["suri_tls_cnt"] = len(results["suricata"]["tls"])
                if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                    report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
                if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                    report["suri_file_cnt"] = len(results["suricata"]["files"])
                if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                    report["suri_http_cnt"] = len(results["suricata"]["http"])
        else:
            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"]    = results.get("info")
            report["target"]  = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")

        # Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])

        # Store the report and retrieve its object id.
        self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)

Example #15

Show file

File: virustotal.py Project: kevoreilly/CAPEv2

def vt_lookup(category: str, target: str, results: dict = {}, on_demand: bool = False):
    if not processing_conf.virustotal.enabled or processing_conf.virustotal.get("on_demand", False) and not on_demand:
        return {}
    if category not in ("file", "url"):
        return {"error": True, "msg": "VT category isn't supported"}

    if category == "file":
        if not do_file_lookup:
            return {"error": True, "msg": "VT File lookup disabled in processing.conf"}
        if not os.path.exists(target) and len(target) != 64:
            return {"error": True, "msg": "File doesn't exist"}

        sha256 = target if len(target) == 64 else File(target).get_sha256()
        url = VIRUSTOTAL_FILE_URL.format(id=sha256)

    elif category == "url":
        if not do_url_lookup:
            return {"error": True, "msg": "VT URL lookup disabled in processing.conf"}
        if urlscrub:
            urlscrub_compiled_re = None
            try:
                urlscrub_compiled_re = re.compile(urlscrub)
            except Exception as e:
                log.error(f"Failed to compile urlscrub regex: {e}")
                return {}
            try:
                target = re.sub(urlscrub_compiled_re, "", target)
            except Exception as e:
                return {"error": True, "msg": f"Failed to scrub url: {e}"}

        # normalize the URL the way VT appears to
        if not target.lower().startswith(("http://", "https://")):
            target = f"http://{target}"
        slashsplit = target.split("/")
        slashsplit[0] = slashsplit[0].lower()
        slashsplit[2] = slashsplit[2].lower()
        if len(slashsplit) == 3:
            slashsplit.append("")
        target = "/".join(slashsplit)

        sha256 = hashlib.sha256(target.encode()).hexdigest()
        url = VIRUSTOTAL_URL_URL.format(id=target)

    try:
        r = requests.get(url, headers=headers, verify=True, timeout=timeout)
        if not r.ok:
            return {"error": True, "msg": f"Unable to complete connection to VirusTotal. Status code: {r.status_code}"}
        vt_response = r.json()
        engines = vt_response.get("data", {}).get("attributes", {}).get("last_analysis_results", {})
        if not engines:
            return {}
        virustotal = {
            "names": vt_response.get("data", {}).get("attributes", {}).get("names"),
            "scan_id": vt_response.get("data", {}).get("id"),
            "md5": vt_response.get("data", {}).get("attributes", {}).get("md5"),
            "sha1": vt_response.get("data", {}).get("attributes", {}).get("sha1"),
            "sha256": vt_response.get("data", {}).get("attributes", {}).get("sha256"),
            "tlsh": vt_response.get("data", {}).get("attributes", {}).get("tlsh"),
            "positive": vt_response.get("data", {}).get("attributes", {}).get("last_analysis_stats", {}).get("malicious"),
            "total": len(engines.keys()),
            "permalink": vt_response.get("data", {}).get("links", {}).get("self"),
        }
        if remove_empty:
            virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items() if block["result"]}
        else:
            virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items()}

        virustotal["resource"] = sha256
        virustotal["results"] = []
        detectnames = []
        for engine, block in engines.items():
            virustotal["results"] += [{"vendor": engine.replace(".", "_"), "sig": block["result"]}]
            if block["result"] and "Trojan.Heur." not in block["result"]:
                # weight Microsoft's detection, they seem to be more accurate than the rest
                if engine == "Microsoft":
                    detectnames.append(block["result"])
                detectnames.append(block["result"])

        virustotal["detection"] = get_vt_consensus(detectnames)
        if virustotal.get("detection", False) and results:
            add_family_detection(results, virustotal["detection"], "VirusTotal", virustotal["sha256"])
        if virustotal.get("positives", False) and virustotal.get("total", False):
            virustotal["summary"] = f"{virustotal['positives']}/{virustotal['total']}"

        return virustotal
    except requests.exceptions.RequestException as e:
        return {
            "error": True,
            "msg": f"Unable to complete connection to VirusTotal: {e}",
        }

    return {}

Example #16

Show file

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to Elasticsearch.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_ELASTICSEARCH:
            raise CuckooDependencyError(
                "Unable to import elasticsearch "
                "(install with `pip3 install elasticsearch`)")

        self.connect()
        index_prefix = self.options.get("index", "cuckoo")
        search_only = self.options.get("searchonly", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        idxdate = report["info"]["started"].split(" ")[0]
        self.index_name = '{0}-{1}'.format(index_prefix, idxdate)

        if not search_only:
            if not "network" in report:
                report["network"] = {}

            # Store API calls in chunks for pagination in Django
            if "behavior" in report and "processes" in report["behavior"]:
                new_processes = []
                for process in report["behavior"]["processes"]:
                    new_process = dict(process)
                    chunk = []
                    chunks_ids = []
                    # Loop on each process call.
                    for index, call in enumerate(process["calls"]):
                        # If the chunk size is 100 or if the loop is completed then
                        # store the chunk in Elastcisearch.
                        if len(chunk) == 100:
                            to_insert = {
                                "pid": process["process_id"],
                                "calls": chunk
                            }
                            pchunk = self.es.index(index=self.index_name,
                                                   doc_type="calls",
                                                   body=to_insert)
                            chunk_id = pchunk['_id']
                            chunks_ids.append(chunk_id)
                            # Reset the chunk.
                            chunk = []

                        # Append call to the chunk.
                        chunk.append(call)

                    # Store leftovers.
                    if chunk:
                        to_insert = {
                            "pid": process["process_id"],
                            "calls": chunk
                        }
                        pchunk = self.es.index(index=self.index_name,
                                               doc_type="calls",
                                               body=to_insert)
                        chunk_id = pchunk['_id']
                        chunks_ids.append(chunk_id)

                    # Add list of chunks.
                    new_process["calls"] = chunks_ids
                    new_processes.append(new_process)

                # Store the results in the report.
                report["behavior"] = dict(report["behavior"])
                report["behavior"]["processes"] = new_processes

            # Add screenshot paths
            report["shots"] = []
            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                shots = [
                    shot for shot in os.listdir(shots_path)
                    if shot.endswith(".jpg")
                ]
                for shot_file in sorted(shots):
                    shot_path = os.path.join(self.analysis_path, "shots",
                                             shot_file)
                    screenshot = File(shot_path)
                    if screenshot.valid():
                        # Strip the extension as it's added later
                        # in the Django view
                        report["shots"].append(shot_file.replace(".jpg", ""))

            # Other info we want Quick access to from the web UI
            if "virustotal" in results and results[
                    "virustotal"] and "positives" in results[
                        "virustotal"] and "total" in results["virustotal"]:
                report["virustotal_summary"] = "%s/%s" % (
                    results["virustotal"]["positives"],
                    results["virustotal"]["total"])

            if "suricata" in results and results["suricata"]:
                if "tls" in results["suricata"] and len(
                        results["suricata"]["tls"]) > 0:
                    report["suri_tls_cnt"] = len(results["suricata"]["tls"])
                if results["suricata"] and "alerts" in results[
                        "suricata"] and len(results["suricata"]["alerts"]) > 0:
                    report["suri_alert_cnt"] = len(
                        results["suricata"]["alerts"])
                if "files" in results["suricata"] and len(
                        results["suricata"]["files"]) > 0:
                    report["suri_file_cnt"] = len(results["suricata"]["files"])
                if "http" in results["suricata"] and len(
                        results["suricata"]["http"]) > 0:
                    report["suri_http_cnt"] = len(results["suricata"]["http"])
        else:
            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"] = results.get("info")
            report["target"] = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])

        # Store the report and retrieve its object id.
        self.es.index(index=self.index_name,
                      doc_type="analysis",
                      id=results["info"]["id"],
                      body=report)

Example #17

Show file

File: procdump.py Project: threathive/CAPE

    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not hasattr(self, "procdump_path") or not os.path.exists(
                self.procdump_path):
            return None
        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)
            if not os.path.isfile(file_path):
                continue
            if file_name.endswith("_info.txt"):
                continue
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
            file_info = File(file_path=file_path,
                             guest_paths=metastring,
                             file_name=file_name).get_all()
            metastrings = metastring.split(",")
            file_info["process_path"] = metastrings[2]
            file_info["module_path"] = metastrings[3]
            file_info["process_name"] = file_info["process_path"].split(
                "\\")[-1]
            file_info["pid"] = metastrings[1]
            file_info["cape_type"] = "PE image"
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
            elif type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
            if type_strings[2] == ("(DLL)"):
                file_info["cape_type"] += "DLL"
            else:
                file_info["cape_type"] += "executable"
            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            readit = False
            for texttype in texttypes:
                if texttype in file_info["type"]:
                    readit = True
                    break
            if readit:
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                if len(filedata) > buf:
                    file_info["data"] = convert_to_printable(filedata[:buf] +
                                                             " <truncated>")
                else:
                    file_info["data"] = convert_to_printable(filedata)

            procdump_files.append(file_info)

        return procdump_files

Example #18

Show file

    def process_file(self, file_path, CAPE_files, append_file):
        """Process file.
        @return: file_info
        """
        global cape_config
        cape_name = ""
        strings = []

        buf = self.options.get("buffer", BUFSIZE)

        if file_path.endswith("_info.txt"):
            return

        texttypes = [
            "ASCII",
            "Windows Registry text",
            "XML document text",
            "Unicode text",
        ]

        if os.path.exists(file_path + "_info.txt"):
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
        else:
            metastring = ""

        file_info = File(file_path, metastring).get_all()

        # Get the file data
        with open(file_info["path"], "r") as file_open:
            filedata = file_open.read(buf + 1)
        if len(filedata) > buf:
            file_info["data"] = binascii.b2a_hex(filedata[:buf] +
                                                 " <truncated>")
        else:
            file_info["data"] = binascii.b2a_hex(filedata)

        metastrings = metastring.split(",")
        if len(metastrings) > 1:
            file_info["pid"] = metastrings[1]
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[2]
            file_info["process_name"] = metastrings[2].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[3]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""

        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"
            if file_info["cape_type_code"] == INJECTION_PE:
                file_info["cape_type"] = "Injected PE Image"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SHELLCODE:
                file_info["cape_type"] = "Injected Shellcode/Data"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == EXTRACTION_PE:
                file_info["cape_type"] = "Extracted PE Image"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_SHELLCODE:
                file_info["cape_type"] = "Extracted Shellcode"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                plugx_parser = plugx.PlugXConfig()
                plugx_config = plugx_parser.parse_config(
                    filedata, len(filedata))
                if not "cape_config" in cape_config and plugx_config:
                    cape_config["cape_config"] = {}
                    for key, value in plugx_config.items():
                        cape_config["cape_config"].update({key: [value]})
                    cape_name = "PlugX"
                append_file = False
            if file_info["cape_type_code"] == PLUGX_PAYLOAD:
                file_info["cape_type"] = "PlugX Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            # EvilGrab
            if file_info["cape_type_code"] == EVILGRAB_PAYLOAD:
                file_info["cape_type"] = "EvilGrab Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if file_info["size"] == 256 or file_info["size"] == 260:
                    ConfigItem = "filepath"
                    ConfigData = format(filedata)
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            # Azzy
            if file_info["cape_type_code"] == AZZY_DATA:
                cape_name = "Azzy"
                cape_config["cape_type"] = "Azzy Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if len(metastrings) > 4:
                    AzzyConfigIndex = metastrings[4]
                if AzzyConfigIndex == '0x0':
                    ConfigItem = "Timer1"
                elif AzzyConfigIndex == '0x1':
                    ConfigItem = "Timer2"
                elif AzzyConfigIndex == '0x2':
                    ConfigItem = "Computer Name"
                elif AzzyConfigIndex == '0x3':
                    ConfigItem = "C&C1"
                elif AzzyConfigIndex == '0x4':
                    ConfigItem = "C&C2"
                elif AzzyConfigIndex == '0x5':
                    ConfigItem = "Operation Name"
                elif AzzyConfigIndex == '0x6':
                    ConfigItem = "Keylogger MaxBuffer"
                elif AzzyConfigIndex == '0x7':
                    ConfigItem = "Keylogger MaxTimeout"
                elif AzzyConfigIndex == '0x8':
                    ConfigItem = "Keylogger Flag"
                elif AzzyConfigIndex == '0x9':
                    ConfigItem = "C&C3"
                else:
                    ConfigItem = "Unknown"
                ConfigData = format(filedata)
                if ConfigData:
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                append_file = False
            # UPX
            if file_info["cape_type_code"] == UPX:
                file_info["cape_type"] = "Unpacked PE Image"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            cape_name = hit["name"]
            try:
                file_info["cape_type"] = hit["meta"]["cape_type"]
            except:
                file_info["cape_type"] = cape_name + " Payload"
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # UPX Check and unpack
            if cape_name == 'UPX':
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                unpacked_file = upx_unpack(filedata)
                if unpacked_file and os.path.exists(unpacked_file):
                    unpacked_yara = File(unpacked_file).get_yara(
                        CAPE_YARA_RULEPATH)
                    for unpacked_hit in unpacked_yara:
                        unpacked_name = unpacked_hit["name"]
                        if unpacked_name == 'UPX':
                            # Failed to unpack
                            log.info("CAPE: Failed to unpack UPX")
                            os.unlink(unpacked_file)
                            break
                    if not os.path.exists(self.CAPE_path):
                        os.makedirs(self.CAPE_path)
                    newname = os.path.join(self.CAPE_path,
                                           os.path.basename(unpacked_file))
                    os.rename(unpacked_file, newname)
                    infofd = open(newname + "_info.txt", "a")
                    infofd.write(os.path.basename(unpacked_file) + "\n")
                    infofd.close()

                    # Recursive process of unpacked file
                    upx_extract = self.process_file(newname, CAPE_files, True)
                    if upx_extract["type"]:
                        upx_extract["cape_type"] = "UPX-extracted "
                        type_strings = upx_extract["type"].split()
                        if type_strings[0] == ("PE32+"):
                            upx_extract["cape_type"] += " 64-bit "
                            if type_strings[2] == ("(DLL)"):
                                upx_extract["cape_type"] += "DLL"
                            else:
                                upx_extract["cape_type"] += "executable"
                        if type_strings[0] == ("PE32"):
                            upx_extract["cape_type"] += " 32-bit "
                            if type_strings[2] == ("(DLL)"):
                                upx_extract["cape_type"] += "DLL"
                            else:
                                upx_extract["cape_type"] += "executable"

            # Attempt to import a parser for the yara hit
            # DC3-MWCP
            try:
                mwcp = malwareconfigreporter.malwareconfigreporter()
                kwargs = {}
                mwcp.run_parser(cape_name, data=filedata, **kwargs)
                if mwcp.errors == []:
                    log.info("CAPE: Imported DC3-MWCP parser %s", cape_name)
                    mwcp_loaded = True
                else:
                    error_lines = mwcp.errors[0].split("\n")
                    for line in error_lines:
                        if line.startswith('ImportError: '):
                            log.info("CAPE: DC3-MWCP parser: %s",
                                     line.split(': ')[1])
                    mwcp_loaded = False
            except ImportError:
                mwcp_loaded = False
            # malwareconfig
            try:
                malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules",
                                                     "processing", "parsers",
                                                     "malwareconfig")
                file, pathname, description = imp.find_module(
                    cape_name, [malwareconfig_parsers])
                module = imp.load_module(cape_name, file, pathname,
                                         description)
                malwareconfig_loaded = True
                log.info("CAPE: Imported malwareconfig.com parser %s",
                         cape_name)
            except ImportError:
                #log.info("CAPE: No malwareconfig.com parser for %s", cape_name)
                malwareconfig_loaded = False

            # Get config data
            if mwcp_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                        cape_config["cape_config"] = convert(mwcp.metadata)
                    else:
                        cape_config["cape_config"].update(
                            convert(mwcp.metadata))
                except Exception as e:
                    log.error(
                        "CAPE: DC3-MWCP config parsing error with %s: %s",
                        cape_name, e)
            elif malwareconfig_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                    malwareconfig_config = module.config(filedata)
                    if isinstance(malwareconfig_config, list):
                        for (key,
                             value) in module.config(filedata)[0].iteritems():
                            cape_config["cape_config"].update({key: [value]})
                    elif isinstance(malwareconfig_config, dict):
                        for (key,
                             value) in module.config(filedata).iteritems():
                            cape_config["cape_config"].update({key: [value]})
                except Exception as e:
                    log.error("CAPE: malwareconfig parsing error with %s: %s",
                              cape_name, e)

        if cape_name:
            cape_config["cape_name"] = format(cape_name)
            if not "cape" in self.results:
                #self.results["cape"] = []
                self.results["cape"] = cape_name
            #if cape_name not in self.results["cape"]:
            #    self.results["cape"].append(cape_name)

        if append_file == True:
            CAPE_files.append(file_info)
        return file_info

Example #19

Show file

    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup",
                                                       False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_sha256()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError(
                        "Failed to compile urlscrub regex" % (e))
                try:
                    resource = re.sub(urlscrub_compiled_re, "", resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))

            # normalize the URL the way VT appears to
            if not resource.lower().startswith(
                    "http://") and not resource.lower().startswith("https://"):
                resource = "http://" + resource
            slashsplit = resource.split('/')
            slashsplit[0] = slashsplit[0].lower()
            slashsplit[2] = slashsplit[2].lower()
            if len(slashsplit) == 3:
                slashsplit.append("")
            resource = "/".join(slashsplit)

            resource = hashlib.sha256(resource).hexdigest()
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url,
                             params=data,
                             verify=True,
                             timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))

        # Work around VT brain-damage
        if isinstance(virustotal, list) and len(virustotal):
            virustotal = virustotal[0]

        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["resource"] = resource
            virustotal["results"] = list(({
                "vendor": engine.replace(".", "_"),
                "sig": signature["result"]
            }) for engine, signature in items)
        return virustotal

Example #20

Show file

    def process_file(self, file_path, append_file, metadata=None):
        """Process file.
        @return: file_info
        """

        if metadata is None:
            metadata = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        cape_names = set()

        if pefile_object:
            self.results.setdefault("pefiles",
                                    {}).setdefault(file_info["sha256"],
                                                   pefile_object)

        if file_info.get("clamav") and processing_conf.detections.clamav:
            clamav_detection = get_clamav_consensus(file_info["clamav"])
            if clamav_detection:
                add_family_detection(self.results, clamav_detection, "ClamAV",
                                     file_info["sha256"])

        # should we use dropped path here?
        static_file_info(
            file_info,
            file_path,
            str(self.task["id"]),
            self.task.get("package", ""),
            self.task.get("options", ""),
            self.self_extracted,
            self.results,
        )

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        if metadata.get("pids", False):
            file_info["pid"] = metadata["pids"][0] if len(
                metadata["pids"]) == 1 else ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            elif file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            elif file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].rsplit(
                        "\\", 1)[-1]
                    file_info["target_pid"] = metastrings[4]

            elif file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True

            # PlugX
            elif file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        self.update_cape_configs(cape_name, plugx_config)
                        cape_names.add(cape_name)
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled"
                        )
                    append_file = False

            # Attempt to decrypt script dump
            elif file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                            with open(filepath, "w") as cfile:
                                cfile.write(bindata)
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload"
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits

        # Prefilter extracted data + beauty is better than oneliner:
        all_files = []
        for extracted_file in file_info.get("extracted_files", []):
            yara_hits = extracted_file["cape_yara"]
            if not yara_hits:
                continue
            if extracted_file.get("data", b""):
                extracted_file_data = make_bytes(extracted_file["data"])
            else:
                with open(extracted_file["path"], "rb") as fil:
                    extracted_file_data = fil.read()
            for yara in yara_hits:
                all_files.append((
                    f"[{extracted_file.get('sha256', '')}]{file_info['path']}",
                    extracted_file_data,
                    yara,
                ))

        for yara in file_info["cape_yara"]:
            all_files.append((file_info["path"], file_data, yara))

        executed_config_parsers = collections.defaultdict(set)
        for tmp_path, tmp_data, hit in all_files:
            # Check for a payload or config hit
            try:
                if File.yara_hit_provides_detection(hit):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = File.get_cape_name_from_yara_hit(hit)
                    cape_names.add(cape_name)
            except Exception as e:
                print(f"Cape type error: {e}")
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    file_info["cape_type"] += "DLL" if type_strings[2] == (
                        "(DLL)") else "executable"

            if cape_name and cape_name not in executed_config_parsers[tmp_path]:
                tmp_config = static_config_parsers(cape_name, tmp_path,
                                                   tmp_data)
                self.update_cape_configs(cape_name, tmp_config)
                executed_config_parsers[tmp_path].add(cape_name)

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_cape_name = File.get_cape_name_from_cape_type(type_string)
            if tmp_cape_name and tmp_cape_name not in executed_config_parsers:
                tmp_config = static_config_parsers(tmp_cape_name,
                                                   file_info["path"],
                                                   file_data)
                if tmp_config:
                    cape_name = tmp_cape_name
                    cape_names.add(cape_name)
                    log.info("CAPE: config returned for: %s", cape_name)
                    self.update_cape_configs(cape_name, tmp_config)

        self.add_family_detections(file_info, cape_names)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(),
                                                  cape_file["ssdeep"].encode())
                    if ssdeep_grade >= ssdeep_threshold:
                        log.debug(
                            "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d",
                            ssdeep_grade, ssdeep_threshold)
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info["entrypoint"] == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug(
                            "CAPE duplicate output file skipped: matching entrypoint"
                        )
                        append_file = False

        if append_file:
            if HAVE_FLARE_CAPA:
                pretime = timeit.default_timer()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

Example #21

Show file

def tasks_create_file():
    response = {}

    data = request.files.file
    pcap = request.POST.get("pcap", "")
    package = request.forms.get("package", "")
    timeout = request.forms.get("timeout", "")
    priority = request.forms.get("priority", 1)
    options = request.forms.get("options", "")
    machine = request.forms.get("machine", "")
    platform = request.forms.get("platform", "")
    tags = request.forms.get("tags", None)
    custom = request.forms.get("custom", "")
    memory = request.forms.get("memory", "False")
    clock = request.forms.get("clock",
                              datetime.now().strftime("%m-%d-%Y %H:%M:%S"))
    if clock is False or clock is None:
        clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S")
    if "1970" in clock:
        clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S")
    shrike_url = request.forms.get("shrike_url", None)
    shrike_msg = request.forms.get("shrike_msg", None)
    shrike_sid = request.forms.get("shrike_sid", None)
    shrike_refer = request.forms.get("shrike_refer", None)
    static = bool(request.POST.get("static", False))
    unique = bool(request.forms.get("unique", False))
    if memory.upper() == "FALSE" or memory == "0":
        memory = False
    else:
        memory = True

    enforce_timeout = request.forms.get("enforce_timeout", "False")
    if enforce_timeout.upper() == "FALSE" or enforce_timeout == "0":
        enforce_timeout = False
    else:
        enforce_timeout = True

    temp_file_path = store_temp_file(data.file.read(), data.filename)

    if unique and db.check_file_uniq(File(temp_file_path).get_sha256()):
        resp = {
            "error":
            True,
            "error_value":
            "Duplicated file, disable unique option to force submission"
        }
        return jsonize(resp)

    if pcap:
        if data.filename.lower().endswith(".saz"):
            saz = saz_to_pcap(temp_file_path)
            if saz:
                path = saz
                try:
                    os.remove(temp_file_path)
                except:
                    pass
            else:
                resp = {
                    "error": True,
                    "error_value": "Failed to convert PCAP to SAZ"
                }
                return jsonize(resp)
        else:
            path = temp_file_path
        task_id = db.add_pcap(file_path=path)
        task_ids = [task_id]
    else:

        try:
            task_ids, extra_details = db.demux_sample_and_add_to_db(
                file_path=temp_file_path,
                package=package,
                timeout=timeout,
                options=options,
                priority=priority,
                machine=machine,
                platform=platform,
                custom=custom,
                memory=memory,
                enforce_timeout=enforce_timeout,
                tags=tags,
                clock=clock,
                shrike_url=shrike_url,
                shrike_msg=shrike_msg,
                shrike_sid=shrike_sid,
                shrike_refer=shrike_refer,
                static=static,
            )
        except CuckooDemuxError as e:
            return HTTPError(500, e)

    response["task_ids"] = task_ids
    return jsonize(response)

Example #22

Show file

File: mongodb.py Project: ianshefferman/cuckoo-modified

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if not "network" in report:
            report["network"] = {}
        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        if "procmemory" in report:
            # Store the process memory dump file in GridFS and reference it back in the report.
            for idx, procmem in enumerate(report['procmemory']):
                procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid']))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update({"procmem_id": procmem_id})

        # Store the suri extracted files in GridFS and reference it back in the report.
        suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip")
        suri_extracted_zip = File(suri_extracted_zip_path)
        if suri_extracted_zip.valid():
            suri_extracted_zip_id = self.store_file(suri_extracted_zip)
            report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id}
            report["suricata"].update(results["suricata"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Store the Zipped Droppings file in GridFS and reference it back in the report.
        #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip")
        #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path)
        #if cuckoo_dropped_zip.valid():
        #    cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip)
        #    report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id}
        #    report["zippeddroppings"].update(results["zippeddroppings"])


        # Walk through the suricata extracted files, store them in GridFS and update the
        # report with the ObjectIds.
        new_suricata_files = []
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("files") and results["suricata"]["files"]:
                for suricata_file_e in results["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        suricata_file = File(suricata_file_e["file_info"]["path"])
                        if suricata_file.valid():
                            suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"])
                            tmp_suricata_file_d["object_id"] = suricata_file_id
                            new_suricata_files.append(tmp_suricata_file_d)

                report["suricata"]["files"] = new_suricata_files

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {"pid": process["process_id"],
                                     "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            child_key, csize = self.debug_dict_size(report[parent_key])[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                while error_saved:
                    log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" %
                             (parent_key, child_key, int(psize) / 1048576))
                    del report[parent_key][child_key]
                    try:
                        self.db.analysis.save(report)
                        error_saved = False
                    except InvalidDocument as e:
                        parent_key, psize = self.debug_dict_size(report)[0]
                        child_key, csize = self.debug_dict_size(report[parent_key])[0]
                        log.error(str(e))
                        log.error("Largest parent key: %s (%d bytes)" % (parent_key, int(psize) / 1048576))
                        log.error("Largest child key: %s (%d bytes)" % (child_key, int(csize) / 1048576))

        self.conn.close()

Example #23

Show file

    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""

        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)
        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        if pefile_object:
            self.results.setdefault("pefiles", {})
            self.results["pefiles"].setdefault(file_info["sha256"],
                                               pefile_object)

        # Get the file data
        try:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()
        except UnicodeDecodeError as e:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] == INJECTION_SECTION:
                file_info["cape_type"] = "Injected Section"
                if len(metastrings) > 4:
                    file_info["section_handle"] = metastrings[4]

            simple_cape_type_map = {
                UNPACKED_PE: "Unpacked PE Image",
                UNPACKED_SHELLCODE: "Unpacked Shellcode",
            }
            if file_info["cape_type_code"] in simple_cape_type_map:
                file_info["cape_type"] = simple_cape_type_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()
            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = dict()
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled."
                        )
                    append_file = False
            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                if file_info["cape_type_code"] in config_mapping:
                    file_info["cape_type"] = code_mapping[
                        file_info["cape_type_code"]]

                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]

                append_file = True

            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if file_info["size"] == 256 or file_info["size"] == 260:
                    config[cape_name].update({"filepath": [format(file_data)]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            if file_info["cape_type_code"] == SEDRECO_DATA:
                cape_name = "Sedreco"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Sedreco Config"
                if len(metastrings) > 4:
                    SedrecoConfigIndex = metastrings[4]
                    if SedrecoConfigIndex in sedreco_map:
                        ConfigItem = sedreco_map[SedrecoConfigIndex]
                    else:
                        ConfigItem = "Unknown"

                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                append_file = False

            if file_info["cape_type_code"] == CERBER_CONFIG:
                file_info["cape_type"] = "Cerber Config"
                cape_name = "Cerber"
                config[cape_name] = dict()
                config["cape_type"] = "Cerber Config"

                parsed = json.loads(file_data.rstrip(b"\0"))
                config[cape_name].update({
                    "JSON Data":
                    [json.dumps(parsed, indent=4, sort_keys=True)]
                })
                append_file = True

            if file_info["cape_type_code"] == URSNIF_PAYLOAD:
                cape_name = "Ursnif"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Ursnif Payload"

                file_info["cape_type"] = "Ursnif Payload"
            if file_info["cape_type_code"] == URSNIF_CONFIG:
                file_info["cape_type"] = "Ursnif Config"
                cape_name = "Ursnif"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s",
                              cape_name)
                except ImportError:
                    log.debug(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        malwareconfig_config = module.config(file_data)
                        if malwareconfig_config:
                            config[cape_name] = dict()
                            config[cape_name]["cape_type"] = "Ursnif Config"
                            if isinstance(malwareconfig_config, list):
                                for (key,
                                     value) in malwareconfig_config[0].items():
                                    config[cape_name].update({key: [value]})
                            elif isinstance(malwareconfig_config, dict):
                                for (key,
                                     value) in malwareconfig_config.items():
                                    config[cape_name].update({key: [value]})
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = False
            # Hancitor
            if file_info["cape_type_code"] == HANCITOR_PAYLOAD:
                cape_name = "Hancitor"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Payload"
                file_info["cape_type"] = "Hancitor Payload"
            if file_info["cape_type_code"] == HANCITOR_CONFIG:
                cape_name = "Hancitor"
                file_info["cape_type"] = "Hancitor Config"
                ConfigStrings = file_data.split(b"\0")
                ConfigStrings = [_f for _f in ConfigStrings if _f]
                ConfigItem = "Campaign Code"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Config"
                config[cape_name].update({ConfigItem: [ConfigStrings[0]]})
                GateURLs = ConfigStrings[1].split(b"|")
                for index, value in enumerate(GateURLs):
                    ConfigItem = "Gate URL " + str(index + 1)
                    config[cape_name].update({ConfigItem: [value]})
                append_file = False
            # QakBot
            if file_info["cape_type_code"] == QAKBOT_CONFIG:
                file_info["cape_type"] = "QakBot Config"
                cape_name = "QakBot"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "QakBot Config"
                config_tmp = static_config_parsers(cape_name, file_data)
                if config_tmp and config_tmp[cape_name]:
                    config.update(config_tmp)
                append_file = False
            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s",
                              cape_name)
                except ImportError:
                    log.debug(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += "," + file_info["process_path"]
                            tmpstr += "," + file_info["module_path"]
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = str(
                                    MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = str(
                                    MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload."
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ["payload", "config", "loader"]
            try:
                for type in extraction_types:
                    if type in hit["meta"].get("cape_type", "").lower():
                        file_info["cape_type"] = hit["meta"]["cape_type"]
                        cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print("Cape type error: {}".format(e))
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            suppress_parsing_list = ["Cerber", "Ursnif"]

            if hit["name"] in suppress_parsing_list:
                continue

            tmp_config = static_config_parsers(hit["name"].replace("_", " "),
                                               file_data)
            if tmp_config and tmp_config[hit["name"].replace("_", " ")]:
                config.update(tmp_config)

        if cape_name:
            if not "detections" in self.results:
                if cape_name != "UPX":
                    #ToDo list of keys
                    self.results["detections"] = cape_name

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(
                        file_info["ssdeep"].encode("utf-8"),
                        cape_file["ssdeep"].encode("utf-8"))
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info.get("entrypoint") and file_info["entrypoint"]
                            == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file is True:
            pretime = datetime.now()
            capa_details = flare_capa_details(file_path, "CAPE")
            if capa_details:
                file_info["flare_capa"] = capa_details
            self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            self.cape["configs"].append(config)

Example #24

Show file

File: mongodb.py Project: EmergingThreats/cuckoo-1.1

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        for dropped in report["dropped"]:
            new_drop = dict(dropped)
            drop = File(dropped["path"])
            if drop.valid():
                dropped_id = self.store_file(drop, filename=dropped["name"])
                new_drop["object_id"] = dropped_id

            new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Store the Zipped Droppings file in GridFS and reference it back in the report.
        #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip")
        #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path)
        #if cuckoo_dropped_zip.valid():
        #    cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip)
        #    report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id}
        #    report["zippeddroppings"].update(results["zippeddroppings"])


        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"],
                                 "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if report.has_key("virustotal") and report["virustotal"] and report["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (report["virustotal"]["positives"],report["virustotal"]["total"])

        new_suricata_files = []
        if report.has_key("suricata") and report["suricata"]:
            suricata={}
            suricata["info"]={}
            suricata["info"]["id"]=report["info"]["id"]
            # Walk through the suricata extracted files, store them in GridFS and update the
            # report with the ObjectIds
            # Store the suri extracted files in GridFS and reference it back in the report.
            suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip")
            suri_extracted_zip = File(suri_extracted_zip_path)
            if suri_extracted_zip.valid():
                suri_extracted_zip_id = self.store_file(suri_extracted_zip)
                suricata["suri_extracted_zip"]=suri_extracted_zip_id

            if report["suricata"].has_key("files") and len(report["suricata"]["files"]) > 0:
                suricata["file_cnt"] = len(report["suricata"]["files"])
                for suricata_file_e in report["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        suricata_file = File(suricata_file_e["file_info"]["path"])
                        if suricata_file.valid():
                            suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"])
                            tmp_suricata_file_d["object_id"] = suricata_file_id
                            new_suricata_files.append(tmp_suricata_file_d)
                suricata["files"] = new_suricata_files
            if report["suricata"].has_key("alert_log_full_path") and report["suricata"]["alert_log_full_path"]:
                 suricata_alert_log = File(report["suricata"]["alert_log_full_path"])
                 if suricata_alert_log.valid():
                    suricata_alert_log_id = self.store_file(suricata_alert_log)
                    suricata["alert_log_id"] = suricata_alert_log_id

            if report["suricata"].has_key("tls_log_full_path") and report["suricata"]["tls_log_full_path"]:
                tls_log = File(report["suricata"]["tls_log_full_path"])
                if tls_log.valid():
                    tls_log_id = self.store_file(tls_log)
                    suricata["tls_log_id"] = tls_log_id

            if report["suricata"].has_key("http_log_full_path") and report["suricata"]["http_log_full_path"]:
                http_log = File(report["suricata"]["http_log_full_path"])
                if http_log.valid():
                    http_log_id = self.store_file(http_log)
                    suricata["http_log_id"] = http_log_id

            if report["suricata"].has_key("file_log_full_path") and report["suricata"]["file_log_full_path"]:
                file_log = File(report["suricata"]["file_log_full_path"])
                if file_log.valid():
                    file_log_id = self.store_file(file_log)
                    suricata["file_log_id"] = file_log_id
            if report["suricata"].has_key("dns_log_full_path") and report["suricata"]["dns_log_full_path"]:
                dns_log = File(report["suricata"]["dns_log_full_path"])
                if dns_log.valid():
                    dns_log_id = self.store_file(dns_log)
                    suricata["dns_log_id"] = dns_log_id
            if report["suricata"].has_key("ssh_log_full_path") and report["suricata"]["ssh_log_full_path"]:
                ssh_log = File(report["suricata"]["ssh_log_full_path"])
                if ssh_log.valid():
                    ssh_log_id = self.store_file(ssh_log)
                    suricata["ssh_log_id"] = ssh_log_id

            if report["suricata"].has_key("tls") and len(report["suricata"]["tls"]) > 0:
                suricata["tls_cnt"] = len(report["suricata"]["tls"])
                suricata["tls"]=report["suricata"]["tls"]
            if report["suricata"] and report["suricata"].has_key("alerts") and len(report["suricata"]["alerts"]) > 0:
                suricata["alert_cnt"] = len(report["suricata"]["alerts"])
                suricata["alerts"]=report["suricata"]["alerts"]
            if results["suricata"].has_key("http") and len(report["suricata"]["http"]) > 0:
                suricata["http_cnt"] = len(report["suricata"]["http"])
                suricata["http"]=report["suricata"]["http"]
            self.db.suricata.save(suricata)
            #do not store this in analysis collection
            del report["suricata"]
        if results.has_key("behavior") and results["behavior"].has_key("martianlist") and results["behavior"]["martianlist"] and len(results["behavior"]["martianlist"]) > 0:
            report["mlist_cnt"] = len(results["behavior"]["martianlist"])

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()

Example #25

Show file

    def run(self):
        """Run Suricata.
        @return: hash with alerts
        """
        self.key = "suricata"
        # General
        SURICATA_CONF = self.options.get("conf", None)
        SURICATA_EVE_LOG = self.options.get("evelog", None)
        SURICATA_ALERT_LOG = self.options.get("alertlog", None)
        SURICATA_TLS_LOG = self.options.get("tlslog", None)
        SURICATA_HTTP_LOG = self.options.get("httplog", None)
        SURICATA_SSH_LOG = self.options.get("sshlog", None)
        SURICATA_DNS_LOG = self.options.get("dnslog", None)
        SURICATA_FILE_LOG = self.options.get("fileslog", None)
        SURICATA_FILES_DIR = self.options.get("filesdir", None)
        SURICATA_RUNMODE = self.options.get("runmode", None)
        SURICATA_FILE_BUFFER = self.options.get("buffer", 8192)
        Z7_PATH = self.options.get("7zbin", None)
        FILES_ZIP_PASS = self.options.get("zippass", None)
        SURICATA_FILE_COPY_DST_DIR = self.options.get("file_copy_dest_dir",
                                                      None)
        SURICATA_FILE_COPY_MAGIC_RE = self.options.get("file_magic_re", None)
        if SURICATA_FILE_COPY_MAGIC_RE:
            try:
                SURICATA_FILE_COPY_MAGIC_RE = re.compile(
                    SURICATA_FILE_COPY_MAGIC_RE)
            except:
                log.warning("Failed to compile suricata copy magic RE" %
                            (SURICATA_FILE_COPY_MAGIC_RE))
                SURICATA_FILE_COPY_MAGIC_RE = None
        # Socket
        SURICATA_SOCKET_PATH = self.options.get("socket_file", None)
        SURICATA_SOCKET_PYLIB = self.options.get("pylib_dir", None)

        # Command Line
        SURICATA_BIN = self.options.get("bin", None)

        suricata = {}
        suricata["alerts"] = []
        suricata["tls"] = []
        suricata["perf"] = []
        suricata["files"] = []
        suricata["http"] = []
        suricata["dns"] = []
        suricata["ssh"] = []
        suricata["file_info"] = []

        suricata["eve_log_full_path"] = None
        suricata["alert_log_full_path"] = None
        suricata["tls_log_full_path"] = None
        suricata["http_log_full_path"] = None
        suricata["file_log_full_path"] = None
        suricata["ssh_log_full_path"] = None
        suricata["dns_log_full_path"] = None

        SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_ALERT_LOG)
        SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_TLS_LOG)
        SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_HTTP_LOG)
        SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_SSH_LOG)
        SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_DNS_LOG)
        SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_EVE_LOG)
        SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_FILE_LOG)
        SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_FILES_DIR)

        if not os.path.exists(SURICATA_CONF):
            log.warning("Unable to Run Suricata: Conf File %s Does Not Exist" %
                        (SURICATA_CONF))
            return suricata["alerts"]
        if not os.path.exists(self.pcap_path):
            log.warning("Unable to Run Suricata: Pcap file %s Does Not Exist" %
                        (self.pcap_path))
            return suricata["alerts"]

        # Add to this if you wish to ignore any SIDs for the suricata alert logs
        # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for
        # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert)
        sid_blacklist = [
            # SURICATA FRAG IPv6 Fragmentation overlap
            2200074,
            # ET INFO InetSim Response from External Source Possible SinkHole
            2017363,
            # SURICATA UDPv4 invalid checksum
            2200075,
            # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack
            2019416,
        ]

        if SURICATA_RUNMODE == "socket":
            if SURICATA_SOCKET_PYLIB != None:
                sys.path.append(SURICATA_SOCKET_PYLIB)
            try:
                from suricatasc import SuricataSC
            except Exception as e:
                log.warning("Failed to import suricatasc lib %s" % (e))
                return suricata

            loopcnt = 0
            maxloops = 24
            loopsleep = 5

            args = {}
            args["filename"] = self.pcap_path
            args["output-dir"] = self.logs_path

            suris = SuricataSC(SURICATA_SOCKET_PATH)
            try:
                suris.connect()
                suris.send_command("pcap-file", args)
            except Exception as e:
                log.warning(
                    "Failed to connect to socket and send command %s: %s" %
                    (SURICATA_SOCKET_PATH, e))
                return suricata
            while loopcnt < maxloops:
                try:
                    pcap_flist = suris.send_command("pcap-file-list")
                    current_pcap = suris.send_command("pcap-current")
                    log.debug("pcapfile list: %s current pcap: %s" %
                              (pcap_flist, current_pcap))

                    if self.pcap_path not in pcap_flist["message"][
                            "files"] and current_pcap[
                                "message"] != self.pcap_path:
                        log.debug(
                            "Pcap not in list and not current pcap lets assume it's processed"
                        )
                        break
                    else:
                        loopcnt = loopcnt + 1
                        time.sleep(loopsleep)
                except Exception as e:
                    log.warning(
                        "Failed to get pcap status breaking out of loop %s" %
                        (e))
                    break

            if loopcnt == maxloops:
                log.warning(
                    "Loop timeout of %ssec occured waiting for file %s to finish processing"
                    % (maxloops * loopsleep, pcapfile))
                return suricata
        elif SURICATA_RUNMODE == "cli":
            if not os.path.exists(SURICATA_BIN):
                log.warning(
                    "Unable to Run Suricata: Bin File %s Does Not Exist" %
                    (SURICATA_CONF))
                return suricata["alerts"]
            cmd = "%s -c %s -k none -l %s -r %s" % (
                SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret != 0:
                log.warning(
                    "Suricata returned a Exit Value Other than Zero %s" %
                    (stderr))
                return suricata

        else:
            log.warning("Unknown Suricata Runmode")
            return suricata

        datalist = []
        if os.path.exists(SURICATA_EVE_LOG_FULL_PATH):
            suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH
            with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log:
                datalist.append(eve_log.read())
        else:
            paths = [("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH),
                     ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH),
                     ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH),
                     ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH),
                     ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)]
            for path in paths:
                if os.path.exists(path[1]):
                    suricata[path[0]] = path[1]
                    with open(path[1], "rb") as the_log:
                        datalist.append(the_log.read())

        if not datalist:
            log.warning("Suricata: Failed to find usable Suricata log file")

        for data in datalist:
            for line in data.splitlines():
                try:
                    parsed = json.loads(line)
                except:
                    log.warning("Suricata: Failed to parse line as json" %
                                (line))
                    continue

                if parsed["event_type"] == "alert":
                    if (parsed["alert"]["signature_id"] not in sid_blacklist
                            and not parsed["alert"]["signature"].startswith(
                                "SURICATA STREAM")):
                        alog = dict()
                        if parsed["alert"]["gid"] == '':
                            alog["gid"] = "None"
                        else:
                            alog["gid"] = parsed["alert"]["gid"]
                        if parsed["alert"]["rev"] == '':
                            alog["rev"] = "None"
                        else:
                            alog["rev"] = parsed["alert"]["rev"]
                        if parsed["alert"]["severity"] == '':
                            alog["severity"] = "None"
                        else:
                            alog["severity"] = parsed["alert"]["severity"]
                        alog["sid"] = parsed["alert"]["signature_id"]
                        alog["srcport"] = parsed["src_port"]
                        alog["srcip"] = parsed["src_ip"]
                        alog["dstport"] = parsed["dest_port"]
                        alog["dstip"] = parsed["dest_ip"]
                        alog["protocol"] = parsed["proto"]
                        alog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        if parsed["alert"]["category"] == '':
                            alog["category"] = "None"
                        else:
                            alog["category"] = parsed["alert"]["category"]
                        alog["signature"] = parsed["alert"]["signature"]
                        suricata["alerts"].append(alog)

                elif parsed["event_type"] == "http":
                    hlog = dict()
                    hlog["srcport"] = parsed["src_port"]
                    hlog["srcip"] = parsed["src_ip"]
                    hlog["dstport"] = parsed["dest_port"]
                    hlog["dstip"] = parsed["dest_ip"]
                    hlog["timestamp"] = parsed["timestamp"].replace("T", " ")
                    try:
                        hlog["uri"] = parsed["http"]["url"]
                    except:
                        hlog["uri"] = "None"
                    hlog["length"] = parsed["http"]["length"]
                    try:
                        hlog["hostname"] = parsed["http"]["hostname"]
                    except:
                        hlog["hostname"] = "None"
                    try:
                        hlog["status"] = str(parsed["http"]["status"])
                    except:
                        hlog["status"] = "None"
                    try:
                        hlog["method"] = parsed["http"]["http_method"]
                    except:
                        hlog["method"] = "None"
                    try:
                        hlog["contenttype"] = parsed["http"][
                            "http_content_type"]
                    except:
                        hlog["contenttype"] = "None"
                    try:
                        hlog["ua"] = parsed["http"]["http_user_agent"]
                    except:
                        hlog["ua"] = "None"
                    try:
                        hlog["referrer"] = parsed["http"]["http_refer"]
                    except:
                        hlog["referrer"] = "None"
                    suricata["http"].append(hlog)

                elif parsed["event_type"] == "tls":
                    tlog = dict()
                    tlog["srcport"] = parsed["src_port"]
                    tlog["srcip"] = parsed["src_ip"]
                    tlog["dstport"] = parsed["dest_port"]
                    tlog["dstip"] = parsed["dest_ip"]
                    tlog["timestamp"] = parsed["timestamp"].replace("T", " ")
                    tlog["fingerprint"] = parsed["tls"]["fingerprint"]
                    tlog["issuer"] = parsed["tls"]["issuerdn"]
                    tlog["version"] = parsed["tls"]["version"]
                    tlog["subject"] = parsed["tls"]["subject"]
                    suricata["tls"].append(tlog)

                elif parsed["event_type"] == "ssh":
                    suricata["ssh"].append(parsed)
                elif parsed["event_type"] == "dns":
                    suricata["dns"].append(parsed)

        if os.path.exists(SURICATA_FILE_LOG_FULL_PATH):
            suricata["file_log_full_path"] = SURICATA_FILE_LOG_FULL_PATH
            f = open(SURICATA_FILE_LOG_FULL_PATH, "rb").readlines()
            for l in f:
                try:
                    d = json.loads(l)
                except:
                    log.warning("failed to load JSON from file log")
                    continue
                # Some log entries do not have an id
                if "id" not in d:
                    continue
                src_file = "%s/file.%s" % (SURICATA_FILES_DIR_FULL_PATH,
                                           d["id"])
                if os.path.exists(src_file):
                    if SURICATA_FILE_COPY_MAGIC_RE and SURICATA_FILE_COPY_DST_DIR and os.path.exists(
                            SURICATA_FILE_COPY_DST_DIR):
                        try:
                            m = re.search(SURICATA_FILE_COPY_MAGIC_RE,
                                          d["magic"])
                            if m:
                                dst_file = "%s/%s" % (
                                    SURICATA_FILE_COPY_DST_DIR, d["md5"])
                                shutil.copy2(src_file, dst_file)
                                log.warning("copied %s to %s" %
                                            (src_file, dst_file))
                        except Exception, e:
                            log.warning("Unable to copy suricata file: %s" % e)
                    file_info = File(file_path=src_file).get_all()
                    texttypes = [
                        "ASCII",
                        "Windows Registry text",
                        "XML document text",
                        "Unicode text",
                    ]
                    readit = False
                    for texttype in texttypes:
                        if texttype in file_info["type"]:
                            readit = True
                            break
                    if readit:
                        with open(file_info["path"], "rb") as drop_open:
                            filedata = drop_open.read(SURICATA_FILE_BUFFER + 1)
                        if len(filedata) > SURICATA_FILE_BUFFER:
                            file_info["data"] = convert_to_printable(
                                filedata[:SURICATA_FILE_BUFFER] +
                                " <truncated>")
                        else:
                            file_info["data"] = convert_to_printable(filedata)
                    d["file_info"] = file_info
                if "/" in d["filename"]:
                    d["filename"] = d["filename"].split("/")[-1]
                suricata["files"].append(d)

Example #26

Show file

File: CAPE.py Project: onesorzer0es/CAPEv2

    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)

        file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all()
        if pefile_object:
            self.results.setdefault("pefiles", {})
            self.results["pefiles"].setdefault(file_info["sha256"], pefile_object)

        # Allows to put execute file extractors/unpackers
        generic_file_extractors(file_path, self.dropped_path, file_info.get("type", ""), file_info)

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        is_text_file(file_info, self.CAPE_path, buf, file_data)

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].rsplit("\\", 1)[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True

                """
                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                """

            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = {}
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error("CAPE: PlugX config parsing failure - size many not be handled")
                    append_file = False

            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE")
                    file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname, description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += f",{file_info['process_path']}"
                            tmpstr += f",{file_info['module_path']}"
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = f"{MOREEGGSJS_PAYLOAD},{tmpstr}\n"
                                # with open(f"{filepath}_info.txt", "w") as infofd:
                                #    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = f"{MOREEGGSBIN_PAYLOAD},{tmpstr}\n"
                                # with open(f"{filepath}_info.txt", "w") as infofd:
                                #    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info("CAPE: Script Dump does not contain known encrypted payload")
                    except Exception as e:
                        log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info("CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ("payload", "config", "loader")

            try:
                if any([file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types]):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print(f"Cape type error: {e}")
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            if hit["name"] == "GuLoader":
                self.detect2pid(file_info["pid"], "GuLoader")

            cape_name = hit["name"].replace("_", " ")
            tmp_config = static_config_parsers(hit["name"], file_data)
            if tmp_config and tmp_config.get(cape_name):
                config.update(tmp_config[cape_name])

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_config = static_config_parsers(type_string.split(" ", 1)[0], file_data)
            if tmp_config:
                cape_name = type_string.split(" ", 1)[0]
                log.info("CAPE: config returned for: %s", cape_name)
                config.update(tmp_config)

        if cape_name:
            if "detections" not in self.results:
                if cape_name != "UPX":
                    # ToDo list of keys
                    self.results["detections"] = cape_name
            if file_info.get("pid"):
                self.detect2pid(file_info["pid"], cape_name)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode())
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"):
                    if (
                        file_info.get("entrypoint")
                        and file_info["entrypoint"] == cape_file["entrypoint"]
                        and file_info["cape_type_code"] == cape_file["cape_type_code"]
                        and file_info["ep_bytes"] == cape_file["ep_bytes"]
                    ):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file:
            if HAVE_FLARE_CAPA:
                pretime = datetime.now()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            if cape_name in multi_block_config and self.cape["configs"]:
                for conf in self.cape["configs"]:
                    if cape_name in conf:
                        conf[cape_name].update(config)
            else:
                # in case if malware name is missed it will break conf visualization
                if cape_name not in config:
                    config = {cape_name: config}
                if config not in self.cape["configs"]:
                    self.cape["configs"].append(config)

Example #27

Show file

File: scheduler.py Project: naxonez/CAPEv2

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False
        dead_machine = False
        self.socks5s = _load_socks5_operational()

        log.info("Task #{0}: Starting analysis of {1} '{2}'".format(
            self.task.id, self.task.category.upper(),
            convert_to_printable(self.task.target)))

        # Initialize the analysis folders.
        if not self.init_storage():
            log.debug("Failed to initialize the analysis folder")
            return False

        sha256 = File(self.task.target).get_sha256()

        if self.task.category in ["file", "pcap", "static"]:
            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            if not self.check_file(sha256):
                return False

            # Store a copy of the original file.
            if not self.store_file(sha256):
                return False

        if self.task.category in ("pcap", "static"):
            if self.task.category == "pcap":
                if hasattr(os, "symlink"):
                    os.symlink(self.binary,
                               os.path.join(self.storage, "dump.pcap"))
                else:
                    shutil.copy(self.binary,
                                os.path.join(self.storage, "dump.pcap"))
            # create the logs/files directories as
            # normally the resultserver would do it
            dirnames = ["logs", "files", "aux"]
            for dirname in dirnames:
                try:
                    os.makedirs(os.path.join(self.storage, dirname))
                except:
                    pass
            return True

        # Acquire analysis machine.
        try:
            self.acquire_machine()
            self.db.set_task_vm(self.task.id, self.machine.label,
                                self.machine.id)
        # At this point we can tell the ResultServer about it.
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Task #{0}: Cannot acquire machine: {1}".format(
                self.task.id, e))
            return False

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            log.exception(e)
            self.errors.put(e)

        aux = RunAuxiliary(task=self.task, machine=self.machine)

        try:
            unlocked = False

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            # Start the machine.
            machinery.start(self.machine.label)

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            aux.start()

            # Initialize the guest manager.
            guest = GuestManager(self.machine.name, self.machine.ip,
                                 self.machine.platform, self.task.id, self)

            options["clock"] = self.db.update_clock(self.task.id)
            self.db.guest_set_status(self.task.id, "starting")
            # Start the analysis.
            guest.start_analysis(options)
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                guest.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")
            succeeded = True
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id})
            dead_machine = True
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id})
        finally:
            # Stop Auxiliary modules.
            aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                try:
                    dump_path = get_memdump_path(self.task.id)
                    need_space, space_available = free_space_monitor(
                        os.path.dirname(dump_path), return_value=True)
                    if need_space:
                        log.error(
                            "Not enough free disk space! Could not dump ram (Only %d MB!)",
                            space_available)
                    else:
                        machinery.dump_memory(self.machine.label, dump_path)
                except NotImplementedError:
                    log.error("The memory dump functionality is not available "
                              "for the current machine manager.")

                except CuckooMachineError as e:
                    log.error(e)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)

            except CuckooMachineError as e:
                log.warning(
                    "Task #{0}: Unable to stop machine {1}: {2}".format(
                        self.task.id, self.machine.label, e))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            if dead_machine:
                # Remove the guest from the database, so that we can assign a
                # new guest when the task is being analyzed with another
                # machine.
                self.db.guest_remove(guest_log)

                # Remove the analysis directory that has been created so
                # far, as launch_analysis() is going to be doing that again.
                shutil.rmtree(self.storage)

                # This machine has turned dead, so we throw an exception here
                # which informs the AnalysisManager that it should analyze
                # this task again with another available machine.
                raise CuckooDeadMachine()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)

            except CuckooMachineError as e:
                log.error("Task #{0}: Unable to release machine {1}, reason "
                          "{2}. You might need to restore it manually.".format(
                              self.task.id, self.machine.label, e))

        return succeeded

Example #28

Show file

File: reporthtml.py Project: iNarcissuss/Cuckoodroid-1

    def run(self, results):
        """Writes report.
        @param results: Cuckoo results dict.
        @raise CuckooReportError: if fails to write report.
        """
        if not HAVE_JINJA2:
            raise CuckooReportError(
                "Failed to generate HTML report: Jinja2 library is not "
                "installed (install `pip install jinja2`)")

        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = []
            counter = 1
            for shot_name in os.listdir(shots_path):
                if not shot_name.endswith(".jpg"):
                    continue

                shot_path = os.path.join(shots_path, shot_name)
                if not os.path.getsize(shot_path):
                    continue

                shot = {}
                shot["id"] = os.path.splitext(File(shot_path).get_name())[0]
                shot["data"] = base64.b64encode(open(shot_path, "rb").read())
                shots.append(shot)

                counter += 1

            shots.sort(key=lambda shot: shot["id"])
            results["screenshots"] = shots
        else:
            results["screenshots"] = []

        env = Environment(autoescape=True)
        env.loader = FileSystemLoader(os.path.join(CUCKOO_ROOT, "data",
                                                   "html"))

        processed = None
        mapping = [
            ("file_read", "File", "Read"),
            ("file_written", "File", "Written"),
            ("file_deleted", "File", "Deleted"),
            ("file_opened", "File", "Opened"),
            ("file_copied", "File", "Copied"),
            ("file_moved", "File", "Moved"),
            ("connects_ip", "Network", "Connects IP"),
            ("resolves_url", "Network", "Resolves URL"),
            ("fetches_url", "Network", "Fetches URL"),
            ("connects_host", "Network", "Connects Host"),
            ("downloads_file_url", "Network", "Downloads File URL"),
            ("directory_created", "Directory", "Created"),
            ("directory_removed", "Directory", "Removed"),
            ("directory_enumerated", "Directory", "Enumerated"),
            ("regkey_opened", "Registry Key", "Opened"),
            ("regkey_deleted", "Registry Key", "Deleted"),
            ("regkey_read", "Registry Key", "Read"),
            ("regkey_written", "Registry Key", "Written"),
            ("mutex", "Mutex", "Accessed"),
        ]

        processed = {}
        for proc in results.get("behavior", {}).get("generic", []):
            for orig, cat, subcat in mapping:
                if cat not in processed:
                    processed[cat] = {}

                if subcat not in processed[cat]:
                    processed[cat][subcat] = []

                # Special handling required for file moved/copied.
                if orig == "file_moved" or orig == "file_copied":
                    for src, dst in proc.get("summary", {}).get(orig, []):
                        entry = "%s -> %s" % (src, dst)
                        processed[cat][subcat].append(entry)
                    continue

                if "summary" in proc and orig in proc["summary"]:
                    for content in proc["summary"][orig]:
                        processed[cat][subcat].append(content)

        try:
            tpl = env.get_template("report.html")
            html = tpl.render({
                "results": results,
                "processed": processed,
                "mapping": mapping
            })
        except Exception as e:
            raise CuckooReportError("Failed to generate HTML report: %s" % e)

        try:
            report_path = os.path.join(self.reports_path, "report.html")
            with codecs.open(report_path, "w", encoding="utf-8") as report:
                report.write(html)
        except (TypeError, IOError) as e:
            raise CuckooReportError("Failed to write HTML report: %s" % e)

        return True

Example #29

Show file

File: procmemory.py Project: tsarpaul/CAPE

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                if os.path.getsize(dmp_path) == 0:
                    continue

                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(
                    os.path.splitext(os.path.basename(dmp_path))[0])
                for process in self.results.get("behavior", {}).get(
                        "processes", []) or []:
                    if process_id == process["process_id"]:
                        process_name = process["process_name"]
                        process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(
                        os.path.join(CUCKOO_ROOT, "data", "yara",
                                     "index_memory.yar")),
                    address_space=procdump.pretty_print(),
                )
                endlimit = ""
                if not HAVE_RE2:
                    endlimit = "8192"

                if do_strings:
                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(
                            minchars) + "," + endlimit + "})\x00"
                        upat = "((?:[\x20-\x7e][\x00]){" + str(
                            minchars) + "," + endlimit + "})\x00\x00"
                    else:
                        apat = "[\x20-\x7e]{" + str(
                            minchars) + "," + endlimit + "}"
                        upat = "(?:[\x20-\x7e][\x00]){" + str(
                            minchars) + "," + endlimit + "}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(str(ws.decode("utf-16le")))

                    proc["strings_path"] = dmp_path + ".strings"
                    f = open(proc["strings_path"], "w")
                    f.write("\n".join(strings))
                    f.close()

                procdump.close()
                results.append(proc)
        return results

Example #30

Show file

    def process_file(self, file_path, CAPE_output, append_file):
        """Process file.
        @return: file_info
        """
        global cape_config
        cape_name = ""
        strings = []

        buf = self.options.get("buffer", BUFSIZE)

        if file_path.endswith("_info.txt"):
            return

        texttypes = [
            "ASCII",
            "Windows Registry text",
            "XML document text",
            "Unicode text",
        ]

        if os.path.exists(file_path + "_info.txt"):
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
        else:
            metastring = ""

        file_info = File(file_path, metastring).get_all()

        # Get the file data
        with open(file_info["path"], "r") as file_open:
            file_data = file_open.read(buf + 1)
        if len(file_data) > buf:
            file_info["data"] = binascii.b2a_hex(file_data[:buf] +
                                                 " <truncated>")
        else:
            file_info["data"] = binascii.b2a_hex(file_data)

        metastrings = metastring.split(",")
        if len(metastrings) > 1:
            file_info["pid"] = metastrings[1]
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[2]
            file_info["process_name"] = metastrings[2].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[3]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""

        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"
            if file_info["cape_type_code"] == INJECTION_PE:
                file_info["cape_type"] = "Injected PE Image"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SHELLCODE:
                file_info["cape_type"] = "Injected Shellcode/Data"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SECTION:
                file_info["cape_type"] = "Injected Section"
                if len(metastrings) > 4:
                    file_info["section_handle"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_PE:
                file_info["cape_type"] = "Extracted PE Image"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_SHELLCODE:
                file_info["cape_type"] = "Extracted Shellcode"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                plugx_parser = plugx.PlugXConfig()
                plugx_config = plugx_parser.parse_config(
                    file_data, len(file_data))
                if not "cape_config" in cape_config and plugx_config:
                    cape_config["cape_config"] = {}
                    for key, value in plugx_config.items():
                        cape_config["cape_config"].update({key: [value]})
                    cape_name = "PlugX"
                else:
                    log.error(
                        "CAPE: PlugX config parsing failure - size many not be handled."
                    )
                append_file = False
            if file_info["cape_type_code"] == PLUGX_PAYLOAD:
                file_info["cape_type"] = "PlugX Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            # EvilGrab
            if file_info["cape_type_code"] == EVILGRAB_PAYLOAD:
                file_info["cape_type"] = "EvilGrab Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if file_info["size"] == 256 or file_info["size"] == 260:
                    ConfigItem = "filepath"
                    ConfigData = format(file_data)
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            # Sedreco
            if file_info["cape_type_code"] == SEDRECO_DATA:
                cape_name = "Sedreco"
                cape_config["cape_type"] = "Sedreco Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if len(metastrings) > 4:
                    SedrecoConfigIndex = metastrings[4]
                if SedrecoConfigIndex == '0x0':
                    ConfigItem = "Timer1"
                elif SedrecoConfigIndex == '0x1':
                    ConfigItem = "Timer2"
                elif SedrecoConfigIndex == '0x2':
                    ConfigItem = "Computer Name"
                elif SedrecoConfigIndex == '0x3':
                    ConfigItem = "C&C1"
                elif SedrecoConfigIndex == '0x4':
                    ConfigItem = "C&C2"
                elif SedrecoConfigIndex == '0x5':
                    ConfigItem = "Operation Name"
                elif SedrecoConfigIndex == '0x6':
                    ConfigItem = "Keylogger MaxBuffer"
                elif SedrecoConfigIndex == '0x7':
                    ConfigItem = "Keylogger MaxTimeout"
                elif SedrecoConfigIndex == '0x8':
                    ConfigItem = "Keylogger Flag"
                elif SedrecoConfigIndex == '0x9':
                    ConfigItem = "C&C3"
                else:
                    ConfigItem = "Unknown"
                ConfigData = format(file_data)
                if ConfigData:
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                append_file = False
            # Cerber
            if file_info["cape_type_code"] == CERBER_CONFIG:
                file_info["cape_type"] = "Cerber Config"
                cape_config["cape_type"] = "Cerber Config"
                cape_name = "Cerber"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                ConfigItem = "JSON Data"
                parsed = json.loads(file_data.rstrip(b'\0'))
                ConfigData = json.dumps(parsed, indent=4, sort_keys=True)
                cape_config["cape_config"].update({ConfigItem: [ConfigData]})
                append_file = True
            if file_info["cape_type_code"] == CERBER_PAYLOAD:
                file_info["cape_type"] = "Cerber Payload"
                cape_config["cape_type"] = "Cerber Payload"
                cape_name = "Cerber"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                append_file = True
            # Ursnif
            if file_info["cape_type_code"] == URSNIF_CONFIG:
                file_info["cape_type"] = "Ursnif Config"
                cape_config["cape_type"] = "Ursnif Config"
                cape_name = "Ursnif"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "malwareconfig")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.info("CAPE: Imported malwareconfig.com parser %s",
                             cape_name)
                except ImportError:
                    log.info(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        if not "cape_config" in cape_config:
                            cape_config["cape_config"] = {}
                        malwareconfig_config = module.config(file_data)
                        if isinstance(malwareconfig_config, list):
                            for (key,
                                 value) in malwareconfig_config[0].iteritems():
                                cape_config["cape_config"].update(
                                    {key: [value]})
                        elif isinstance(malwareconfig_config, dict):
                            for (key,
                                 value) in malwareconfig_config.iteritems():
                                cape_config["cape_config"].update(
                                    {key: [value]})
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = False
            # Hancitor
            if file_info["cape_type_code"] == HANCITOR_PAYLOAD:
                cape_name = "Hancitor"
                cape_config["cape_type"] = "Hancitor Payload"
                file_info["cape_type"] = "Hancitor Payload"
            if file_info["cape_type_code"] == HANCITOR_CONFIG:
                cape_name = "Hancitor"
                cape_config["cape_type"] = "Hancitor Config"
                file_info["cape_type"] = "Hancitor Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                ConfigStrings = file_data.split('\0')
                ConfigStrings = filter(None, ConfigStrings)
                ConfigItem = "Campaign Code"
                cape_config["cape_config"].update(
                    {ConfigItem: [ConfigStrings[0]]})
                GateURLs = ConfigStrings[1].split('|')
                for index, value in enumerate(GateURLs):
                    ConfigItem = "Gate URL " + str(index + 1)
                    cape_config["cape_config"].update({ConfigItem: [value]})
                append_file = False
            # QakBot
            if file_info["cape_type_code"] == QAKBOT_CONFIG:
                file_info["cape_type"] = "QakBot Config"
                cape_config["cape_type"] = "QakBot Config"
                cape_name = "QakBot"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                for line in file_data.splitlines():
                    if '=' in line:
                        index = line.split('=')[0]
                        data = line.split('=')[1]
                    if index == '10':
                        ConfigItem = "Botnet name"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '11':
                        ConfigItem = "Number of C2 servers"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '47':
                        ConfigItem = "Bot ID"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '3':
                        ConfigItem = "Config timestamp"
                        ConfigData = datetime.datetime.fromtimestamp(
                            int(data)).strftime('%H:%M:%S %d-%m-%Y')
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '22':
                        values = data.split(':')
                        ConfigItem = "Password #1"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #1"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #1"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '23':
                        values = data.split(':')
                        ConfigItem = "Password #2"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #2"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #2"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '24':
                        values = data.split(':')
                        ConfigItem = "Password #3"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #3"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #3"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '25':
                        values = data.split(':')
                        ConfigItem = "Password #4"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #4"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #4"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '26':
                        values = data.split(':')
                        ConfigItem = "Password #5"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #5"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #5"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                append_file = False
            if file_info["cape_type_code"] == QAKBOT_PAYLOAD:
                file_info["cape_type"] = "QakBot Payload"
                cape_config["cape_type"] = "QakBot Payload"
                cape_name = "QakBot"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                append_file = True
            # UPX package output
            if file_info["cape_type_code"] == UPX:
                file_info["cape_type"] = "Unpacked PE Image"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data, CAPE_output)

            # Check for a payload or config hit
            try:
                if "payload" in hit["meta"]["cape_type"].lower(
                ) or "config" in hit["meta"]["cape_type"].lower():
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"]
            except:
                pass
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            suppress_parsing_list = ["Cerber", "Ursnif", "QakBot"]

            if hit["name"] in suppress_parsing_list:
                continue

            # Attempt to import a parser for the hit
            # DC3-MWCP
            mwcp_loaded = False
            if cape_name:
                try:
                    mwcp_parsers = os.path.join(CUCKOO_ROOT, "modules",
                                                "processing", "parsers",
                                                "mwcp", "parsers")
                    mwcp = reporter.Reporter(parserdir=mwcp_parsers)
                    kwargs = {}
                    mwcp.run_parser(cape_name, data=file_data, **kwargs)
                    if mwcp.errors == []:
                        log.info("CAPE: Imported DC3-MWCP parser %s",
                                 cape_name)
                        mwcp_loaded = True
                    else:
                        error_lines = mwcp.errors[0].split("\n")
                        for line in error_lines:
                            if line.startswith('ImportError: '):
                                log.info("CAPE: DC3-MWCP parser: %s",
                                         line.split(': ')[1])
                except ImportError:
                    pass

            # malwareconfig
            malwareconfig_loaded = False
            if cape_name and mwcp_loaded == False:
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "malwareconfig")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.info("CAPE: Imported malwareconfig.com parser %s",
                             cape_name)
                except ImportError:
                    log.info(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)

            # Get config data
            if mwcp_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                        cape_config["cape_config"] = convert(mwcp.metadata)
                    else:
                        cape_config["cape_config"].update(
                            convert(mwcp.metadata))
                except Exception as e:
                    log.error(
                        "CAPE: DC3-MWCP config parsing error with %s: %s",
                        cape_name, e)
            elif malwareconfig_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                    malwareconfig_config = module.config(file_data)
                    if isinstance(malwareconfig_config, list):
                        for (key,
                             value) in malwareconfig_config[0].iteritems():
                            cape_config["cape_config"].update({key: [value]})
                    elif isinstance(malwareconfig_config, dict):
                        for (key, value) in malwareconfig_config.iteritems():
                            cape_config["cape_config"].update({key: [value]})
                except Exception as e:
                    log.error("CAPE: malwareconfig parsing error with %s: %s",
                              cape_name, e)

            if "cape_config" in cape_config:
                if cape_config["cape_config"] == {}:
                    del cape_config["cape_config"]

        if cape_name:
            if "cape_config" in cape_config:
                cape_config["cape_name"] = format(cape_name)
            if not "cape" in self.results:
                if cape_name != "UPX":
                    self.results["cape"] = cape_name

        # Remove duplicate payloads from web ui
        for cape_file in CAPE_output:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"],
                                                  cape_file["ssdeep"])
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info["entrypoint"] and file_info["entrypoint"] == cape_file["entrypoint"] \
                    and file_info["ep_bytes"] == cape_file["ep_bytes"]:
                    append_file = False

        if append_file == True:
            CAPE_output.append(file_info)
        return file_info

Example #31

Show file

File: procdump.py Project: xme/CAPEv2

    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not os.path.exists(self.procdump_path):
            return None

        meta = dict()
        if os.path.exists(self.files_metadata):
            for line in open(self.files_metadata, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                    "metadata": entry["metadata"],
                }

        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)

            file_info = File(file_path=file_path,
                             guest_paths=meta[file_path]["metadata"],
                             file_name=file_name).get_all()
            metastrings = meta[file_path].get("metadata", "").split(";?")
            if len(metastrings) < 3:
                continue
            file_info["process_path"] = metastrings[1]
            file_info["module_path"] = metastrings[2]
            file_info["process_name"] = file_info["process_path"].split(
                "\\")[-1]
            file_info["pid"] = meta[file_path]["pids"][0]
            type_strings = file_info["type"].split()
            if len(type_strings) < 3:
                continue
            if type_strings[0] == "MS-DOS":
                file_info["cape_type"] = "DOS MZ image: executable"
            else:
                file_info["cape_type"] = "PE image"
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                elif type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            readit = False
            for texttype in texttypes:
                if texttype in file_info["type"]:
                    readit = True
                    break
            if readit:
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                if len(filedata) > buf:
                    file_info["data"] = convert_to_printable(filedata[:buf] +
                                                             " <truncated>")
                else:
                    file_info["data"] = convert_to_printable(filedata)

            procdump_files.append(file_info)

        return procdump_files

Example #32

Show file

File: reversinglabs.py Project: kevoreilly/CAPEv2

def reversing_labs_lookup(target: str, is_hash: bool = False):
    _headers = {
        "User-Agent": "CAPE Sandbox",
        "Content-Type": "application/json",
        "Authorization": f"Token {processing_conf.reversinglabs.key}",
    }

    report_fields = [
        "id",
        "sha1",
        "sha256",
        "sha512",
        "md5",
        "category",
        "file_type",
        "file_subtype",
        "identification_name",
        "identification_version",
        "file_size",
        "extracted_file_count",
        "local_first_seen",
        "local_last_seen",
        "classification_origin",
        "classification_reason",
        "classification_source",
        "classification",
        "riskscore",
        "classification_result",
        "ticore",
        "tags",
        "summary",
        "discussion",
        "ticloud",
        "aliases",
    ]
    if not is_hash:
        sha256 = target if len(target) == 64 else File(target).get_sha256()
    else:
        sha256 = target
    full_report_lookup = {
        "hash_values": [sha256],
        "report_fields": report_fields
    }
    try:
        r = requests.post(
            url=processing_conf.reversinglabs.url +
            "/api/samples/v2/list/details/",
            headers=_headers,
            data=json.dumps(full_report_lookup),
        )
    except requests.exceptions.RequestException as e:
        return {
            "error": True,
            "msg": f"Unable to complete connection to Reversing Labs: {e}",
        }

    reversing_labs_response = r.json()
    if r.status_code != 200:
        return {
            "error":
            True,
            "msg":
            f"Unable to complete lookup to Reversing Labs: {r.json().get('message')}",
        }
    if not reversing_labs_response.get("results"):
        return {"error": True, "msg": "No results found."}
    results = reversing_labs_response["results"][0]
    # most_recent_scan_engines = results["av_scanners"][-1]

    scanner_summary = results["av_scanners_summary"]
    sample_summary = results["sample_summary"]
    ticloud = results["ticloud"]
    ticore = results["ticore"]

    scanner_total = scanner_summary["scanner_count"]
    scanner_evil = scanner_summary["scanner_match"]
    classification = sample_summary["classification"]
    classification_result = sample_summary["classification_result"]
    file = ticore["info"]["file"]
    malicious = classification in [
        "malicious", "suspicious"
    ] and sample_summary["goodware_override"] is False
    md5 = sample_summary["md5"]
    sha1 = sample_summary["sha1"]
    sha256 = sample_summary["sha256"]
    riskscore = ticloud["riskscore"]
    name = file["proposed_filename"]
    entropy = file["entropy"]
    story = ticore["story"]

    reversing_labs = {
        "name": name,
        "md5": md5,
        "sha1": sha1,
        "sha256": sha256,
        "malicious": malicious,
        "classification": classification,
        "classification_result": classification_result,
        "riskscore": riskscore,
        "detected": scanner_evil,
        "total": scanner_total,
        "story": story,
        "permalink": os.path.join(processing_conf.reversinglabs.url, sha256),
    }

    return reversing_labs

Example #33

Show file

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256",
                                      unique=True,
                                      sparse=True,
                                      name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Store the process memory dump file in GridFS and reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(self.analysis_path, "memory",
                                            "{0}.dmp".format(procmem["pid"]))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update(
                        {"procmem_id": procmem_id})

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop,
                                                 filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()

Example #34

Show file

File: googleplay.py Project: evandowning/cuckoo

    def run(self):
        """Run Google play unofficial python api the get the google play information
        @return: list of google play features
        """
        self.key = "googleplay"
        googleplay = {}

        if not HAVE_GOOGLEPLAY:
            log.error("Unable to import the GooglePlay library, has it been "
                      "installed properly?")
            return

        if not HAVE_ANDROGUARD:
            log.error("Could not find the Androguard library, please install "
                      "it. (`pip install androguard`)")

        if ("file" not in self.task["category"]):
            return

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            android_id = self.options.get("android_id")
            google_login = self.options.get("google_login")
            google_password = self.options.get("google_password")
            # auth_token = self.options.get("auth_token", None)

            if not android_id and not google_login and not google_password:
                raise CuckooProcessingError("Google Play Credentials not configured, skip")

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    package = a.get_package()
                    # Connect
                    api = GooglePlayAPI(android_id)
                    api.login(google_login, google_password, None)

                    # Get the version code and the offer type from the app details
                    app_data = api.details(package)
                    app_detail = app_data.docV2.details.appDetails

                    if not app_detail.installationSize:
                        return googleplay

                    googleplay["title"] = app_detail.title
                    googleplay["app_category"] = app_detail.appCategory._values
                    googleplay["version_code"] = app_detail.versionCode
                    googleplay["app_type"] = app_detail.appType
                    googleplay["content_rating"] = app_detail.contentRating
                    googleplay["developer_email"] = app_detail.developerEmail
                    googleplay["developer_name"] = app_detail.developerName
                    googleplay["developer_website"] = app_detail.developerWebsite
                    googleplay["installation_size"] = app_detail.installationSize
                    googleplay["num_downloads"] = app_detail.numDownloads
                    googleplay["upload_date"] = app_detail.uploadDate
                    googleplay["permissions"] = app_detail.permission._values
            except (IOError, OSError, BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return googleplay

Example #35

Show file

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = self.task.options
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes
        options["upload_max_size"] = self.cfg.resultserver.upload_max_size
        options["do_upload_max_size"] = int(
            self.cfg.resultserver.do_upload_max_size)

        if not self.task.timeout or self.task.timeout == 0:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            # if it's a PE file, collect export information to use in more smartly determining the right
            # package to use
            options["exports"] = ""
            if HAVE_PEFILE and ("PE32" in options["file_type"] or
                                "MS-DOS executable" in options["file_type"]):
                try:
                    pe = pefile.PE(self.task.target)
                    if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                        exports = []
                        for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                            try:
                                if not exported_symbol.name:
                                    continue
                                if isinstance(exported_symbol.name, bytes):
                                    exports.append(
                                        re.sub(b"[^A-Za-z0-9_?@-]", b"",
                                               exported_symbol.name).decode(
                                                   "utf-8"))
                                else:
                                    exports.append(
                                        re.sub("[^A-Za-z0-9_?@-]", "",
                                               exported_symbol.name))
                            except Exception as e:
                                log.error(e, exc_info=True)

                        options["exports"] = ",".join(exports)
                except Exception as e:
                    log.error("PE type not recognised")
                    log.error(e, exc_info=True)

        # options from auxiliar.conf
        for plugin in self.aux_cfg.auxiliar_modules.keys():
            options[plugin] = self.aux_cfg.auxiliar_modules[plugin]

        return options

Example #36

Show file

File: mongodb.py Project: 453483289/cuckoo-modified

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    # Strip the extension as it's added later 
                    # in the Django view
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {"pid": process["process_id"],
                                     "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        # Calculate the mlist_cnt for display if present to reduce db load
        if "signatures" in results:
            for entry in results["signatures"]:
                if entry["name"] == "ie_martian_children":
                    report["mlist_cnt"] = len(entry["data"])
                if entry["name"] == "office_martian_children":
                    report["f_mlist_cnt"] = len(entry["data"])

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])
        
        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            child_key, csize = self.debug_dict_size(report[parent_key])[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                while error_saved:
                    log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" %
                             (parent_key, child_key, int(psize) / 1048576))
                    del report[parent_key][child_key]
                    try:
                        self.db.analysis.save(report)
                        error_saved = False
                    except InvalidDocument as e:
                        parent_key, psize = self.debug_dict_size(report)[0]
                        child_key, csize = self.debug_dict_size(report[parent_key])[0]
                        log.error(str(e))
                        log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                        log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))

        self.conn.close()

Example #37

Show file

File: resubmitexe.py Project: KillerInstinct/elastic-cuckoo-modified

    def run(self, results):
        self.noinject = self.options.get("noinject", False)
        filesdict = {}
        self.task_options_stack = []
        self.task_options = None
        self.task_custom = None
        report = dict(results)

        if report["info"].has_key(
                "options") and "resubmitjob=true" in report["info"]["options"]:
            return
        else:
            self.task_options_stack.append("resubmitjob=true")
        if self.noinject:
            self.task_options_stack.append("free=true")

        if self.task_options_stack:
            self.task_options = ','.join(self.task_options_stack)

        report = dict(results)
        for dropped in report["dropped"]:
            if os.path.isfile(dropped["path"]):
                if ("PE32" in dropped["type"] or "MS-DOS"
                        in dropped["type"]) and "DLL" not in dropped["type"]:
                    if not filesdict.has_key(dropped['sha256']):
                        filesdict[dropped['sha256']] = dropped['path']

        if report.has_key("suricata") and report["suricata"]:
            if report["suricata"].has_key(
                    "files") and report["suricata"]["files"]:
                for suricata_file_e in results["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        if os.path.isfile(
                                suricata_file_e["file_info"]["path"]):
                            ftype = suricata_file_e["file_info"]["type"]
                            if ("PE32" in ftype or "MS-DOS"
                                    in ftype) and "DLL" not in ftype:
                                if not filesdict.has_key(
                                        suricata_file_e["file_info"]
                                    ["sha256"]):
                                    filesdict[suricata_file_e["file_info"]
                                              ["sha256"]] = suricata_file_e[
                                                  "file_info"]["path"]

        db = Database()

        for e in filesdict:
            if not File(filesdict[e]).get_size():
                continue
            if not db.find_sample(sha256=e) is None:
                continue

            self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"]
            if report["info"].has_key("custom") and report["info"]["custom"]:
                self.task_custom = "%s Parent_Custom:%s" % (
                    self.task_custom, report["info"]["custom"])
            task_id = db.add_path(file_path=filesdict[e],
                                  package='exe',
                                  timeout=200,
                                  options=self.task_options,
                                  priority=1,
                                  machine=None,
                                  platform=None,
                                  custom=self.task_custom,
                                  memory=False,
                                  enforce_timeout=False,
                                  clock=None,
                                  tags=None)

            if task_id:
                log.info(u"Resubmitexe file \"{0}\" added as task with ID {1}".
                         format(filesdict[e], task_id))
            else:
                log.warn("Error adding resubmitexe task to database")

Example #38

Show file

File: procmemory.py Project: 453483289/cuckoo-modified

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=procdump.pretty_print(),
                )
                endlimit = ""
                if not HAVE_RE2:
                    endlimit = "8192"

                if do_strings:
                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(minchars) + "," + endlimit + "})\x00"
                        upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "})\x00\x00"
                    else:
                        apat = "[\x20-\x7e]{" + str(minchars) + "," + endlimit + "}"
                        upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(str(ws.decode("utf-16le")))

                    proc["strings_path"] = dmp_path + ".strings"
                    f=open(proc["strings_path"], "w")
                    f.write("\n".join(strings))
                    f.close()

                procdump.close()

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                results.append(proc)
        return results

Example #39

Show file

File: objects_tests.py Project: 2Girls1Cake/cuckoo

class TestFile:
    def setUp(self):
        self.tmp = tempfile.mkstemp()
        self.file = File(self.tmp[1])

    def test_get_name(self):
        assert_equal(self.tmp[1].split("/")[-1], self.file.get_name())

    def test_get_data(self):
        assert_equal("", self.file.get_data())

    def test_get_size(self):
        assert_equal(0, self.file.get_size())

    def test_get_crc32(self):
        assert_equal("00000000", self.file.get_crc32())

    def test_get_md5(self):
        assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5())

    def test_get_sha1(self):
        assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709", self.file.get_sha1())

    def test_get_sha256(self):
        assert_equal("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", self.file.get_sha256())

    def test_get_sha512(self):
        assert_equal("cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", self.file.get_sha512())

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert_not_equal(None, self.file.get_ssdeep())
        except ImportError:
            assert_equal(None, self.file.get_ssdeep())

    def test_get_type(self):
        assert_equal("empty", self.file.get_type())

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in ["name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type"]:
            assert key in self.file.get_all()

    def tearDown(self):
        os.remove(self.tmp[1])

Example #40

Show file

File: procmemory.py Project: Cyber-Forensic/cuckoo-modified

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path)
                )

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                results.append(proc)

        return results

Example #41

Show file

File: mongodb.py Project: TheDr1ver/cuckoo

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if "network" not in report:
            report["network"] = {}

        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm")
        mitmpr = File(mitmproxy_path)
        if mitmpr.valid():
            mitmpr_id = self.store_file(mitmpr)
            report["network"]["mitmproxy_id"] = mitmpr_id

        # Store the process memory dump file in GridFS and reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"]))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update({"procmem_id": procmem_id})

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            for shot_file in sorted(os.listdir(shots_path)):
                if not shot_file.endswith(".jpg"):
                    continue

                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        paginate = self.options.get("paginate", 100)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is paginate or if the loop is
                    # completed then store the chunk in MongoDB.
                    if len(chunk) == paginate:
                        to_insert = {"pid": process["pid"], "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["pid"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.close()

Example #42

Show file

File: virustotal.py Project: Hexadite-Barak/cuckoo-modified

    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup", False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_sha256()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError("Failed to compile urlscrub regex" % (e))
                try:
                   resource = re.sub(urlscrub_compiled_re,"",resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))

            # normalize the URL the way VT appears to
            if not resource.lower().startswith("http://") and not resource.lower().startswith("https://"):
                resource = "http://" + resource
            slashsplit = resource.split('/')
            slashsplit[0] = slashsplit[0].lower()
            slashsplit[2] = slashsplit[2].lower()
            if len(slashsplit) == 3:
                slashsplit.append("")
            resource = "/".join(slashsplit)

            resource = hashlib.sha256(resource).hexdigest()
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url, params=data, verify=True, timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))

        # Work around VT brain-damage
        if isinstance(virustotal, list) and len(virustotal):
            virustotal = virustotal[0]

        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["resource"] = resource
            virustotal["results"]=list(({"vendor":engine.replace(".", "_"),"sig": signature["result"]}) 
                                            for engine, signature in items)
        return virustotal

Example #43

Show file

    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup",
                                                       False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_md5()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError(
                        "Failed to compile urlscrub regex" % (e))
                try:
                    resource = re.sub(urlscrub_compiled_re, "", resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url,
                             params=data,
                             verify=True,
                             timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))
        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["results"] = list(({
                "vendor": engine.replace(".", "_"),
                "sig": signature["result"]
            }) for engine, signature in items)
        return virustotal

Example #44

Show file

File: mongodb.py Project: Missuniverse110/cuckoo

    def run(self, results):
        """Writes report.
        @param results: Cuckoo results dict.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        self._connect()

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually created. 
        #  Returns None if the index already exists.
        self._db.fs.files.ensure_index("md5", unique=True, name="md5_unique")

        # Add pcap file, check for dups and in case add only reference.
        pcap_file = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_file)
        if pcap.valid():
            pcap_id = self.store_file(pcap)

            # Preventive key check.
            if "network" in results and isinstance(results["network"], dict):
                results["network"]["pcap_id"] = pcap_id
            else:
                results["network"] = {"pcap_id": pcap_id}

        # Add dropped files, check for dups and in case add only reference.
        dropped_files = {}
        for dir_name, dir_names, file_names in os.walk(os.path.join(self.analysis_path, "files")):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                drop = File(file_path)
                dropped_files[drop.get_md5()] = drop

        result_files = dict((dropped.get("md5", None), dropped) for dropped in results["dropped"])

        # hopefully the md5s in dropped_files and result_files should be the same
        if set(dropped_files.keys()) - set(result_files.keys()):
            log.warning("Dropped files in result dict are different from those in storage.")

        # store files in gridfs
        for md5, fileobj in dropped_files.items():
            # only store in db if we have a filename for it in results (should be all)
            resultsdrop = result_files.get(md5, None)
            if resultsdrop and fileobj.valid():
                drop_id = self.store_file(fileobj, filename=resultsdrop["name"])
                resultsdrop["dropped_id"] = drop_id

        # Add screenshots.
        results["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [f for f in os.listdir(shots_path) if f.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots", shot_file)
                shot = File(shot_path)
                if shot.valid():
                    shot_id = self.store_file(shot)
                    results["shots"].append(shot_id)

        # Save all remaining results.
        try:
            self._db.analysis.save(results, manipulate=False)
        except InvalidDocument:
            # The document is too big, we need to shrink it and re-save it.
            results["behavior"]["processes"] = ""

            # Let's add an error message to the debug block.
            error = ("The analysis results were too big to be stored, " +
                     "the detailed behavioral analysis has been stripped out.")
            results["debug"]["errors"].append(error)

            # Try again to store, if it fails, just abort.
            try:
                self._db.analysis.save(results)
            except Exception as e:
                raise CuckooReportError("Failed to store the document into MongoDB: %s" % e)

Example #45

Show file

class TestFile:
    def setUp(self):
        self.tmp = tempfile.mkstemp()
        self.file = File(self.tmp[1])

    def test_get_name(self):
        assert_equal(self.tmp[1].split("/")[-1], self.file.get_name())

    def test_get_data(self):
        assert_equal("", self.file.get_data())

    def test_get_size(self):
        assert_equal(0, self.file.get_size())

    def test_get_crc32(self):
        assert_equal("00000000", self.file.get_crc32())

    def test_get_md5(self):
        assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5())

    def test_get_sha1(self):
        assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709",
                     self.file.get_sha1())

    def test_get_sha256(self):
        assert_equal(
            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
            self.file.get_sha256())

    def test_get_sha512(self):
        assert_equal(
            "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
            self.file.get_sha512())

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert_not_equal(None, self.file.get_ssdeep())
        except ImportError:
            assert_equal(None, self.file.get_ssdeep())

    def test_get_type(self):
        assert_equal("empty", self.file.get_type())

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in [
                "name", "size", "crc32", "md5", "sha1", "sha256", "sha512",
                "ssdeep", "type"
        ]:
            assert key in self.file.get_all()

    def tearDown(self):
        os.remove(self.tmp[1])

Example #46

Show file

    def run(self):
        """Run Suricata.
        @return: hash with alerts
        """
        self.key = "suricata"
        # General
        SURICATA_CONF = self.options.get("conf", None)
        SURICATA_EVE_LOG = self.options.get("evelog", None)
        SURICATA_ALERT_LOG = self.options.get("alertlog", None)
        SURICATA_TLS_LOG = self.options.get("tlslog", None)
        SURICATA_HTTP_LOG = self.options.get("httplog", None)
        SURICATA_SSH_LOG = self.options.get("sshlog", None)
        SURICATA_DNS_LOG = self.options.get("dnslog", None)
        SURICATA_FILE_LOG = self.options.get("fileslog", None)
        SURICATA_FILES_DIR = self.options.get("filesdir", None)
        SURICATA_RUNMODE = self.options.get("runmode", None)
        SURICATA_FILE_BUFFER = self.options.get("buffer", 8192)
        Z7_PATH = self.options.get("7zbin", None)
        FILES_ZIP_PASS = self.options.get("zippass", None)

        # Socket
        SURICATA_SOCKET_PATH = self.options.get("socket_file", None)

        # Command Line
        SURICATA_BIN = self.options.get("bin", None)

        suricata = dict()
        suricata["alerts"] = []
        suricata["tls"] = []
        suricata["perf"] = []
        suricata["files"] = []
        suricata["http"] = []
        suricata["dns"] = []
        suricata["ssh"] = []
        suricata["fileinfo"] = []

        suricata["eve_log_full_path"] = None
        suricata["alert_log_full_path"] = None
        suricata["tls_log_full_path"] = None
        suricata["http_log_full_path"] = None
        suricata["file_log_full_path"] = None
        suricata["ssh_log_full_path"] = None
        suricata["dns_log_full_path"] = None

        tls_items = [
            "fingerprint", "issuer", "version", "subject", "sni", "ja3",
            "serial"
        ]

        SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_ALERT_LOG)
        SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_TLS_LOG)
        SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_HTTP_LOG)
        SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_SSH_LOG)
        SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_DNS_LOG)
        SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_EVE_LOG)
        SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_FILE_LOG)
        SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_FILES_DIR)

        separate_log_paths = [
            ("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH),
            ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH),
            ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH),
            ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH),
            ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)
        ]

        # handle reprocessing
        all_log_paths = [x[1] for x in separate_log_paths] + \
            [SURICATA_EVE_LOG_FULL_PATH, SURICATA_FILE_LOG_FULL_PATH]
        for log_path in all_log_paths:
            if os.path.exists(log_path):
                try:
                    os.unlink(log_path)
                except:
                    pass
        if os.path.isdir(SURICATA_FILES_DIR_FULL_PATH):
            try:
                shutil.rmtree(SURICATA_FILES_DIR_FULL_PATH, ignore_errors=True)
            except:
                pass

        if not os.path.exists(SURICATA_CONF):
            log.warning(
                "Unable to Run Suricata: Conf File {} Does Not Exist".format(
                    SURICATA_CONF))
            return suricata["alerts"]
        if not os.path.exists(self.pcap_path):
            log.warning(
                "Unable to Run Suricata: Pcap file {} Does Not Exist".format(
                    self.pcap_path))
            return suricata["alerts"]

        # Add to this if you wish to ignore any SIDs for the suricata alert logs
        # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for
        # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert)
        sid_blacklist = [
            # SURICATA FRAG IPv6 Fragmentation overlap
            2200074,
            # ET INFO InetSim Response from External Source Possible SinkHole
            2017363,
            # SURICATA UDPv4 invalid checksum
            2200075,
            # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack
            2019416,
        ]

        if SURICATA_RUNMODE == "socket":
            try:
                #from suricatasc import SuricataSC
                from lib.cuckoo.common.suricatasc import SuricataSC
            except Exception as e:
                log.warning("Failed to import suricatasc lib {}".format(e))
                return suricata

            loopcnt = 0
            maxloops = 24
            loopsleep = 5

            args = dict()
            args["filename"] = self.pcap_path
            args["output-dir"] = self.logs_path

            suris = SuricataSC(SURICATA_SOCKET_PATH)
            try:
                suris.connect()
                suris.send_command("pcap-file", args)
            except Exception as e:
                log.warning(
                    "Failed to connect to socket and send command {}: {}".
                    format(SURICATA_SOCKET_PATH, e))
                return suricata
            while loopcnt < maxloops:
                try:
                    pcap_flist = suris.send_command("pcap-file-list")
                    current_pcap = suris.send_command("pcap-current")
                    log.debug("pcapfile list: {} current pcap: {}".format(
                        pcap_flist, current_pcap))

                    if self.pcap_path not in pcap_flist["message"]["files"] and \
                            current_pcap["message"] != self.pcap_path:
                        log.debug(
                            "Pcap not in list and not current pcap lets assume it's processed"
                        )
                        break
                    else:
                        loopcnt = loopcnt + 1
                        time.sleep(loopsleep)
                except Exception as e:
                    log.warning(
                        "Failed to get pcap status breaking out of loop {}".
                        format(e))
                    break

            if loopcnt == maxloops:
                logstr = "Loop timeout of {} sec occurred waiting for file {} to finish processing"
                log.warning(logstr.format(maxloops * loopsleep, current_pcap))
                return suricata
        elif SURICATA_RUNMODE == "cli":
            if not os.path.exists(SURICATA_BIN):
                log.warning(
                    "Unable to Run Suricata: Bin File {} Does Not Exist".
                    format(SURICATA_CONF))
                return suricata["alerts"]
            cmdstr = "{} -c {} -k none -l {} -r {}"
            cmd = cmdstr.format(SURICATA_BIN, SURICATA_CONF, self.logs_path,
                                self.pcap_path)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret != 0:
                log.warning(
                    "Suricata returned a Exit Value Other than Zero {}".format(
                        stderr))
                return suricata

        else:
            log.warning("Unknown Suricata Runmode")
            return suricata

        datalist = []
        if os.path.exists(SURICATA_EVE_LOG_FULL_PATH):
            suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH
            with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log:
                datalist.append(eve_log.read())
        else:
            for path in separate_log_paths:
                if os.path.exists(path[1]):
                    suricata[path[0]] = path[1]
                    with open(path[1], "rb") as the_log:
                        datalist.append(the_log.read())

        if not datalist:
            log.warning("Suricata: Failed to find usable Suricata log file")

        parsed_files = []
        for data in datalist:
            for line in data.splitlines():
                try:
                    parsed = json.loads(line)
                except:
                    log.warning(
                        "Suricata: Failed to parse line {} as json".format(
                            line))
                    continue

                if 'event_type' in parsed:
                    if parsed["event_type"] == "alert":
                        if (parsed["alert"]["signature_id"]
                                not in sid_blacklist and not parsed["alert"]
                            ["signature"].startswith("SURICATA STREAM")):
                            alog = dict()
                            if parsed["alert"]["gid"] == '':
                                alog["gid"] = "None"
                            else:
                                alog["gid"] = parsed["alert"]["gid"]
                            if parsed["alert"]["rev"] == '':
                                alog["rev"] = "None"
                            else:
                                alog["rev"] = parsed["alert"]["rev"]
                            if parsed["alert"]["severity"] == '':
                                alog["severity"] = "None"
                            else:
                                alog["severity"] = parsed["alert"]["severity"]
                            alog["sid"] = parsed["alert"]["signature_id"]
                            try:
                                alog["srcport"] = parsed["src_port"]
                            except:
                                alog["srcport"] = "None"
                            alog["srcip"] = parsed["src_ip"]
                            try:
                                alog["dstport"] = parsed["dest_port"]
                            except:
                                alog["dstport"] = "None"
                            alog["dstip"] = parsed["dest_ip"]
                            alog["protocol"] = parsed["proto"]
                            alog["timestamp"] = parsed["timestamp"].replace(
                                "T", " ")
                            if parsed["alert"]["category"] == '':
                                alog["category"] = "None"
                            else:
                                alog["category"] = parsed["alert"]["category"]
                            alog["signature"] = parsed["alert"]["signature"]
                            suricata["alerts"].append(alog)

                    elif parsed["event_type"] == "http":
                        hlog = dict()
                        hlog["srcport"] = parsed["src_port"]
                        hlog["srcip"] = parsed["src_ip"]
                        hlog["dstport"] = parsed["dest_port"]
                        hlog["dstip"] = parsed["dest_ip"]
                        hlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        keyword = ("uri", "length", "hostname", "status",
                                   "http_method", "contenttype", "ua",
                                   "referrer")
                        keyword_suri = ("url", "length", "hostname", "status",
                                        "http_method", "http_content_type",
                                        "http_user_agent", "http_refer")
                        for key, key_s in zip(keyword, keyword_suri):
                            try:
                                hlog[key] = parsed["http"].get(key_s, "None")
                            except:
                                hlog[key] = "None"
                        suricata["http"].append(hlog)

                    elif parsed["event_type"] == "tls":
                        tlog = dict()
                        tlog["srcport"] = parsed["src_port"]
                        tlog["srcip"] = parsed["src_ip"]
                        tlog["dstport"] = parsed["dest_port"]
                        tlog["dstip"] = parsed["dest_ip"]
                        tlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        for key in tls_items:
                            if key in parsed["tls"]:
                                tlog[key] = parsed["tls"][key]
                        suricata["tls"].append(tlog)

                    elif parsed["event_type"] == "ssh":
                        suricata["ssh"].append(parsed)
                    elif parsed["event_type"] == "dns":
                        suricata["dns"].append(parsed)
                    elif parsed["event_type"] == "fileinfo":
                        flog = dict()
                        flog["http_host"] = parsed.get("http",
                                                       {}).get("hostname", "")
                        flog["http_uri"] = parsed.get("http",
                                                      {}).get("url", "")
                        flog["http_referer"] = parsed.get("http", {}).get(
                            "referer", "")
                        flog["http_user_agent"] = parsed.get("http", {}).get(
                            "http_user_agent", "")
                        flog["protocol"] = parsed.get("proto", "")
                        flog["magic"] = parsed.get("fileinfo",
                                                   {}).get("magic", "")
                        flog["size"] = parsed.get("fileinfo",
                                                  {}).get("size", "")
                        flog["stored"] = parsed.get("fileinfo",
                                                    {}).get("stored", "")
                        flog["sha256"] = parsed.get("fileinfo",
                                                    {}).get("sha256", "")
                        flog["md5"] = parsed.get("fileinfo", {}).get("md5", "")
                        flog["filename"] = parsed.get("fileinfo",
                                                      {}).get("filename", "")
                        flog["file_info"] = dict()
                        if "/" in flog["filename"]:
                            flog["filename"] = flog["filename"].split("/")[-1]
                        parsed_files.append(flog)

        if parsed_files:
            for sfile in parsed_files:
                if sfile.get("stored", False):
                    filename = sfile["sha256"]
                    src_file = "{}/{}/{}".format(SURICATA_FILES_DIR_FULL_PATH,
                                                 filename[0:2], filename)
                    dst_file = "{}/{}".format(SURICATA_FILES_DIR_FULL_PATH,
                                              filename)
                    if os.path.exists(src_file):
                        try:
                            shutil.move(src_file, dst_file)
                        except OSError as e:
                            log.warning(
                                "Unable to move suricata file: {}".format(e))
                            break
                        file_info = File(file_path=dst_file).get_all()
                        try:
                            with open(file_info["path"], "r") as drop_open:
                                filedata = drop_open.read(
                                    SURICATA_FILE_BUFFER + 1)
                            if len(filedata) > SURICATA_FILE_BUFFER:
                                file_info["data"] = convert_to_printable(
                                    filedata[:SURICATA_FILE_BUFFER] +
                                    " <truncated>")
                            else:
                                file_info["data"] = convert_to_printable(
                                    filedata)
                        except UnicodeDecodeError as e:
                            pass
                        if file_info:
                            sfile["file_info"] = file_info
                    suricata["files"].append(sfile)
            with open(SURICATA_FILE_LOG_FULL_PATH, "w") as drop_log:
                drop_log.write(json.dumps(suricata["files"], indent=4))

            # Cleanup file subdirectories left behind by messy Suricata
            for d in [
                    dirpath
                    for (dirpath, dirnames,
                         filenames) in os.walk(SURICATA_FILES_DIR_FULL_PATH)
                    if len(dirnames) == 0 and len(filenames) == 0
            ]:
                try:
                    shutil.rmtree(d)
                except OSError as e:
                    log.warning(
                        "Unable to delete suricata file subdirectories: {}".
                        format(e))

        if SURICATA_FILES_DIR_FULL_PATH and os.path.exists(SURICATA_FILES_DIR_FULL_PATH) and Z7_PATH \
                and os.path.exists(Z7_PATH):
            # /usr/bin/7z a -pinfected -y files.zip files-json.log files
            cmdstr = "cd {} && {} a -p{} -y files.zip {} {}"
            cmd = cmdstr.format(self.logs_path, Z7_PATH, FILES_ZIP_PASS,
                                SURICATA_FILE_LOG, SURICATA_FILES_DIR)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret > 1:
                log.warning(
                    "Suricata: Failed to create {}/files.zip - Error {}".
                    format(self.logs_path, ret))

        suricata["alerts"] = self.sort_by_timestamp(suricata["alerts"])
        suricata["http"] = self.sort_by_timestamp(suricata["http"])
        suricata["tls"] = self.sort_by_timestamp(suricata["tls"])

        return suricata

Example #47

Show file

 def setUp(self):
     self.tmp = tempfile.mkstemp()
     self.file = File(self.tmp[1])

Example #48

Show file

File: aws.py Project: sooshie/cuckoo-modified

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to S3.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        self.s3_region = self.options.get("region", "us-west-2")
        self.s3_access_key = self.options.get("access_key", "")
        self.s3_secret_key = self.options.get("secret_key", "")
        s3_reports_bucket_name = self.options.get("reports_bucket", "")
        s3_shots_bucket_name = self.options.get("shots_bucket", "")
        s3_samples_bucket_name = self.options.get("samples_bucket", "")
        s3_files_bucket_name = self.options.get("files_bucket", "")
        s3_aux_bucket_name = self.options.get("aux_bucket", "")
        s3_logs_bucket_name = self.options.get("logs_bucket", "")
        s3_pcap_bucket_name = self.options.get("pcap_bucket", "")
        s3_md5_bucket_name = self.options.get("md5_bucket", "")
        cleanup = self.options.get("cleanup", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file))
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                chunk_count = 0
                # Using this type of prefix is useful because you can always re-construct it from
                # the original results
                #chunk_prefix = str(results['info']['id']) + '/' + process['process_name']
                chunk_prefix = str(results['info']['id']) + '/' + str(
                    process['process_id'])
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in S1.

                    if len(chunk) == 100:
                        chunk_name = "{0}.{1}".format(chunk_prefix,
                                                      chunk_count)
                        #log.debug("INFO TIME!")
                        #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                        #log.debug(chunk_prefix)
                        err = self.save_to_s3(s3_reports_bucket_name,
                                              chunk_name, json.dumps(chunk))
                        if err != '':
                            log.error(
                                "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}"
                                .format(chunk_name, err))
                        else:
                            chunks_ids.append("{0}.{1}".format(
                                chunk_prefix, chunk_count))
                            chunk_count += 1
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                    #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                    err = self.save_to_s3(s3_reports_bucket_name, chunk_name,
                                          json.dumps(chunk))
                    if err != '':
                        log.error(
                            "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}"
                            .format(chunk_name, err))
                    else:
                        chunks_ids.append("{0}.{1}".format(
                            chunk_prefix, chunk_count))

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(
                    results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(
                    results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(
                    results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(
                    results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(
                    results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(
                    results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])

        # Store the report (it's 'object id' is simply the analysis id)
        # First make sure it's not too big (5gb limit)
        data = json.dumps(report)
        if len(data) < 5000000000:
            err = self.save_to_s3(s3_reports_bucket_name,
                                  results['info']['id'], data)
            if err != '':
                log.error(
                    "Non-size related issue saving analysis JSON to S3 for report {0} - {1}"
                    .format(results['info']['id'], err))
        else:
            log.error("JSON for analysis id {0} is greater than 5GB".format(
                results['info']['id']))

        #processes the rest of the analysis files and put them in S3
        if s3_shots_bucket_name != '':
            shots_path = os.path.join(results['info']['id'],
                                      self.analysis_path, "shots")
            self.relocate_to_s3(results['info']['id'], shots_path,
                                s3_shots_bucket_name)
        if s3_pcap_bucket_name != '':
            if os.path.isfile(self.analysis_path + '/dump.pcap'):
                with open(self.analysis_path + '/dump.pcap', 'rb') as infile:
                    self.save_to_s3(
                        s3_pcap_bucket_name,
                        "{0}/dump.pcap".format(results['info']['id']),
                        infile.read())
            if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'):
                with open(self.analysis_path + '/dump_sorted.pcap',
                          'rb') as infile:
                    self.save_to_s3(
                        s3_pcap_bucket_name,
                        "{0}/dump_sorted.pcap".format(results['info']['id']),
                        infile.read())
        if s3_aux_bucket_name != '':
            aux_path = os.path.join(results['info']['id'], self.analysis_path,
                                    "aux")
            self.relocate_to_s3(results['info']['id'], aux_path,
                                s3_aux_bucket_name)
        if s3_logs_bucket_name != '':
            logs_path = os.path.join(results['info']['id'], self.analysis_path,
                                     "logs")
            self.relocate_to_s3(results['info']['id'], logs_path,
                                s3_logs_bucket_name)
        if s3_samples_bucket_name != '':
            sample = os.path.realpath(self.analysis_path + '/binary')
            with open(sample, 'rb') as infile:
                self.save_to_s3(s3_samples_bucket_name,
                                results['target']['file']['sha256'],
                                infile.read())
        #log.debug(s3_files_bucket_name)
        if s3_files_bucket_name != '':
            #log.debug(self.analysis_path)
            for root, dirnames, filenames in os.walk(self.analysis_path +
                                                     '/files'):
                #log.debug(filenames)
                for filename in filenames:
                    key_name = str(results['info']['id']) + '/' + root.split(
                        os.sep)[-1] + '/' + filename
                    if not filename.endswith('_info.txt'):
                        key_name = str(
                            results['info']['id']) + '/' + root.split(
                                os.sep)[-1]
                    #log.debug(key_name)
                    with open(os.path.join(root, filename), 'rb') as infile:
                        self.save_to_s3(s3_files_bucket_name, key_name,
                                        infile.read())
        if s3_md5_bucket_name != '':
            info = {}
            info.update(report['info'])
            info.update(report['target'])
            self.save_to_s3(s3_md5_bucket_name,
                            results['target']['file']['md5'], json.dumps(info))

        if cleanup:
            shutil.rmtree(self.analysis_path)

Example #49

Show file

    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if "network" not in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    # Strip the extension as it's added later
                    # in the Django view
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []

        for process in report.get("behavior", {}).get("processes", []) or []:
            new_process = dict(process)
            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []
                # Append call to the chunk.
                chunk.append(call)
            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)
            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)
        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes
        # Calculate the mlist_cnt for display if present to reduce db load
        if "signatures" in results:
            for entry in results["signatures"]:
                if entry["name"] == "ie_martian_children":
                    report["mlist_cnt"] = len(entry["data"])
                if entry["name"] == "office_martian_children":
                    report["f_mlist_cnt"] = len(entry["data"])

        # Other info we want quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.get("suricata", False):

            keywords = ("tls", "alerts", "files", "http", "ssh", "dns")
            keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt",
                             "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt")
            for keyword, keyword_value in zip(keywords, keywords_dict):
                if results["suricata"].get(keyword, 0):
                    report[keyword_value] = len(results["suricata"][keyword])

        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        #trick for distributed api
        if results.get("info", {}).get("options", {}).get("main_task_id", ""):
            report["info"]["id"] = int(
                results["info"]["options"]["main_task_id"])

        analyses = self.db.analysis.find(
            {"info.id": int(report["info"]["id"])})
        if analyses.count() > 0:
            log.debug("Deleting analysis data for Task %s" %
                      report["info"]["id"])
            for analysis in analyses:
                for process in analysis["behavior"]["processes"]:
                    for call in process["calls"]:
                        self.db.calls.remove({"_id": ObjectId(call)})
                self.db.analysis.remove({"_id": ObjectId(analysis["_id"])})
            log.debug("Deleted previous MongoDB data for Task %s" %
                      report["info"]["id"])

        self.ensure_valid_utf8(report)

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report, check_keys=False)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" %
                          (parent_key, int(psize) / MEGABYTE))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                size_filter = MONGOSIZELIMIT
                while error_saved:
                    if type(report) == list:
                        report = report[0]
                    try:
                        if type(report[parent_key]) == list:
                            for j, parent_dict in enumerate(
                                    report[parent_key]):
                                child_key, csize = self.debug_dict_size(
                                    parent_dict, parent_key)[0]
                                if csize > size_filter:
                                    if parent_key == child_key:
                                        log.warn(
                                            "results['%s'] deleted due to size: %s"
                                            % (parent_key, csize))
                                        del report[parent_key]
                                        break
                                    else:
                                        log.warn(
                                            "results['%s']['%s'] deleted due to size: %s"
                                            % (parent_key, child_key, csize))
                                        del report[parent_key][j][child_key]
                        else:
                            child_key, csize = self.debug_dict_size(
                                report[parent_key], parent_key)[0]
                            if csize > size_filter:
                                log.warn(
                                    "else - results['%s']['%s'] deleted due to size: %s"
                                    % (parent_key, child_key, csize))
                                del report[parent_key][child_key]
                        try:
                            self.db.analysis.save(report, check_keys=False)
                            error_saved = False
                        except InvalidDocument as e:
                            parent_key, psize = self.debug_dict_size(report)[0]
                            log.error(str(e))
                            log.error("Largest parent key: %s (%d MB)" %
                                      (parent_key, int(psize) / MEGABYTE))
                            size_filter = size_filter - MEGABYTE
                    except Exception as e:
                        log.error("Failed to delete child key: %s" % str(e))
                        error_saved = False

        self.conn.close()

Example #50

Show file

File: objects_tests.py Project: 2Girls1Cake/cuckoo

 def setUp(self):
     self.tmp = tempfile.mkstemp()
     self.file = File(self.tmp[1])

Example #51

Show file

File: submit.py Project: japaks/cuckoo-modified

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("target",
                        type=str,
                        help="URL, path to the file or folder to analyze")
    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        help="Enable debug logging")
    parser.add_argument(
        "--remote",
        type=str,
        action="store",
        default=None,
        help="Specify IP:port to a Cuckoo API server to submit remotely",
        required=False)
    parser.add_argument("--user",
                        type=str,
                        action="store",
                        default=None,
                        help="Username for Basic Auth",
                        required=False)
    parser.add_argument("--password",
                        type=str,
                        action="store",
                        default=None,
                        help="Password for Basic Auth",
                        required=False)
    parser.add_argument("--sslnoverify",
                        action="store_true",
                        default=False,
                        help="Do not validate SSL cert",
                        required=False)
    parser.add_argument("--ssl",
                        action="store_true",
                        default=False,
                        help="Use SSL/TLS for remote",
                        required=False)
    parser.add_argument("--url",
                        action="store_true",
                        default=False,
                        help="Specify whether the target is an URL",
                        required=False)
    parser.add_argument("--package",
                        type=str,
                        action="store",
                        default="",
                        help="Specify an analysis package",
                        required=False)
    parser.add_argument("--custom",
                        type=str,
                        action="store",
                        default="",
                        help="Specify any custom value",
                        required=False)
    parser.add_argument("--timeout",
                        type=int,
                        action="store",
                        default=0,
                        help="Specify an analysis timeout",
                        required=False)
    parser.add_argument(
        "--options",
        type=str,
        action="store",
        default="",
        help=
        "Specify options for the analysis package (e.g. \"name=value,name2=value2\")",
        required=False)
    parser.add_argument(
        "--priority",
        type=int,
        action="store",
        default=1,
        help="Specify a priority for the analysis represented by an integer",
        required=False)
    parser.add_argument(
        "--machine",
        type=str,
        action="store",
        default="",
        help="Specify the identifier of a machine you want to use",
        required=False)
    parser.add_argument(
        "--platform",
        type=str,
        action="store",
        default="",
        help=
        "Specify the operating system platform you want to use (windows/darwin/linux)",
        required=False)
    parser.add_argument(
        "--memory",
        action="store_true",
        default=False,
        help="Enable to take a memory dump of the analysis machine",
        required=False)
    parser.add_argument(
        "--enforce-timeout",
        action="store_true",
        default=False,
        help="Enable to force the analysis to run for the full timeout period",
        required=False)
    parser.add_argument("--clock",
                        type=str,
                        action="store",
                        default=None,
                        help="Set virtual machine clock",
                        required=False)
    parser.add_argument(
        "--tags",
        type=str,
        action="store",
        default=None,
        help="Specify tags identifier of a machine you want to use",
        required=False)
    parser.add_argument("--max",
                        type=int,
                        action="store",
                        default=None,
                        help="Maximum samples to add in a row",
                        required=False)
    parser.add_argument("--pattern",
                        type=str,
                        action="store",
                        default=None,
                        help="Pattern of files to submit",
                        required=False)
    parser.add_argument("--shuffle",
                        action="store_true",
                        default=False,
                        help="Shuffle samples before submitting them",
                        required=False)
    parser.add_argument("--unique",
                        action="store_true",
                        default=False,
                        help="Only submit new samples, ignore duplicates",
                        required=False)
    parser.add_argument("--quiet",
                        action="store_true",
                        default=False,
                        help="Only print text on failure",
                        required=False)
    parser.add_argument("--gateway",
                        type=str,
                        action="store",
                        default=None,
                        help="Set the default gateway for the task",
                        required=False)
    try:
        args = parser.parse_args()
    except IOError as e:
        parser.error(e)
        return False

    # If the quiet flag has been set, then we also disable the "warning"
    # level of the logging module. (E.g., when pydeep has not been installed,
    # there will be a warning message, because Cuckoo can't resolve the
    # ssdeep hash of this particular sample.)
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig()

    if args.quiet:
        logging.disable(logging.WARNING)

    db = Database()

    target = to_unicode(args.target)

    sane_timeout = min(args.timeout, 60 * 60 * 24)

    gateways = Config("auxiliary").gateways
    if args.gateway and args.gateway in gateways:
        if "," in gateways[args.gateway]:
            tgateway = random.choice(gateways[args.gateway].split(","))
            ngateway = gateways[tgateway]
        else:
            ngateway = gateways[args.gateway]
        if args.options:
            args.options += ","
        args.options += "setgw=%s" % (ngateway)

    if args.url:
        if args.remote:
            if not HAVE_REQUESTS:
                print(
                    bold(red("Error")) +
                    ": you need to install python-requests (`pip install requests`)"
                )
                return False

            if args.ssl:
                url = "https://{0}/tasks/create/url".format(args.remote)
            else:
                url = "http://{0}/tasks/create/url".format(args.remote)

            data = dict(url=target,
                        package=args.package,
                        timeout=sane_timeout,
                        options=args.options,
                        priority=args.priority,
                        machine=args.machine,
                        platform=args.platform,
                        memory=args.memory,
                        enforce_timeout=args.enforce_timeout,
                        custom=args.custom,
                        tags=args.tags)

            try:
                if args.user and args.password:
                    if args.ssl:
                        if args.sslnoverify:
                            verify = False
                        else:
                            verify = True
                        response = requests.post(url,
                                                 auth=(args.user,
                                                       args.password),
                                                 data=data,
                                                 verify=verify)
                    else:
                        response = requests.post(url,
                                                 auth=(args.user,
                                                       args.password),
                                                 data=data)
                else:
                    if args.ssl:
                        if args.sslnoverify:
                            verify = False
                        else:
                            verify = True
                        response = requests.post(url, data=data, verify=verify)
                    else:
                        response = requests.post(url, data=data)

            except Exception as e:
                print(
                    bold(red("Error")) + ": unable to send URL: {0}".format(e))
                return False

            json = response.json()
            task_id = json["task_id"]
        else:
            task_id = db.add_url(target,
                                 package=args.package,
                                 timeout=sane_timeout,
                                 options=args.options,
                                 priority=args.priority,
                                 machine=args.machine,
                                 platform=args.platform,
                                 custom=args.custom,
                                 memory=args.memory,
                                 enforce_timeout=args.enforce_timeout,
                                 clock=args.clock,
                                 tags=args.tags)

        if task_id:
            if not args.quiet:
                print(
                    bold(green("Success")) +
                    u": URL \"{0}\" added as task with ID {1}".format(
                        target, task_id))
        else:
            print(bold(red("Error")) + ": adding task to database")
    else:
        # Get absolute path to deal with relative.
        path = to_unicode(os.path.abspath(target))

        if not os.path.exists(path):
            print(
                bold(red("Error")) +
                u": the specified file/folder does not exist at path \"{0}\"".
                format(path))
            return False

        files = []
        if os.path.isdir(path):
            for dirname, dirnames, filenames in os.walk(path):
                for file_name in filenames:
                    file_path = os.path.join(dirname, file_name)

                    if os.path.isfile(file_path):
                        if args.pattern:
                            if fnmatch.fnmatch(file_name, args.pattern):
                                files.append(to_unicode(file_path))
                        else:
                            files.append(to_unicode(file_path))
        else:
            files.append(path)

        if args.shuffle:
            random.shuffle(files)
        else:
            files = sorted(files)

        for file_path in files:
            if not File(file_path).get_size():
                if not args.quiet:
                    print(
                        bold(
                            yellow("Empty") +
                            ": sample {0} (skipping file)".format(file_path)))

                continue

            if args.max is not None:
                # Break if the maximum number of samples has been reached.
                if not args.max:
                    break

                args.max -= 1

            if args.remote:
                if not HAVE_REQUESTS:
                    print(
                        bold(red("Error")) +
                        ": you need to install python-requests (`pip install requests`)"
                    )
                    return False
                if args.ssl:
                    url = "https://{0}/tasks/create/file".format(args.remote)
                else:
                    url = "http://{0}/tasks/create/file".format(args.remote)

                files = dict(file=open(file_path, "rb"),
                             filename=os.path.basename(file_path))

                data = dict(package=args.package,
                            timeout=sane_timeout,
                            options=args.options,
                            priority=args.priority,
                            machine=args.machine,
                            platform=args.platform,
                            memory=args.memory,
                            enforce_timeout=args.enforce_timeout,
                            custom=args.custom,
                            tags=args.tags)

                try:
                    if args.user and args.password:
                        if args.ssl:
                            if args.sslnoverify:
                                verify = False
                            else:
                                verify = True
                            response = requests.post(url,
                                                     auth=(args.user,
                                                           args.password),
                                                     files=files,
                                                     data=data,
                                                     verify=verify)
                        else:
                            response = requests.post(url,
                                                     auth=(args.user,
                                                           args.password),
                                                     files=files,
                                                     data=data)
                    else:
                        if args.ssl:
                            if args.sslnoverify:
                                verify = False
                            else:
                                verify = True
                            response = requests.post(url,
                                                     files=files,
                                                     data=data,
                                                     verify=verify)
                        else:
                            response = requests.post(url,
                                                     files=files,
                                                     data=data)

                except Exception as e:
                    print(
                        bold(red("Error")) +
                        ": unable to send file: {0}".format(e))
                    return False

                json = response.json()
                task_ids = [json.get("task_ids", None)]

            else:
                if args.unique:
                    sha256 = File(file_path).get_sha256()
                    if not db.find_sample(sha256=sha256) is None:
                        msg = ": Sample {0} (skipping file)".format(file_path)
                        if not args.quiet:
                            print(bold(yellow("Duplicate")) + msg)
                        continue

                task_ids = db.demux_sample_and_add_to_db(
                    file_path=file_path,
                    package=args.package,
                    timeout=sane_timeout,
                    options=args.options,
                    priority=args.priority,
                    machine=args.machine,
                    platform=args.platform,
                    memory=args.memory,
                    custom=args.custom,
                    enforce_timeout=args.enforce_timeout,
                    clock=args.clock,
                    tags=args.tags)

            tasks_count = len(task_ids)
            if tasks_count > 1:
                if not args.quiet:
                    print(
                        bold(green("Success")) +
                        u": File \"{0}\" added as task with IDs {1}".format(
                            file_path, task_ids))
            elif tasks_count > 0:
                if not args.quiet:
                    print(
                        bold(green("Success")) +
                        u": File \"{0}\" added as task with ID {1}".format(
                            file_path, task_ids[0]))
            else:
                print(bold(red("Error")) + ": adding task to database")

Example #52

Show file

File: procmemory.py Project: kevross33/cuckoo-modified

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        zipdump = self.options.get("zipdump", False)
        zipstrings = self.options.get("zipstrings", False)
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path),
                    zipdump=zipdump,
                    zipstrings=zipstrings,
                )

                if do_strings:
                    try:
                        data = open(dmp_path, "r").read()
                    except (IOError, OSError) as e:
                        raise CuckooProcessingError("Error opening file %s" % e)

                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    else:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    zipstrings = self.options.get("zipstrings", False)
                    if zipstrings:
                        try:
                            f = zipfile.ZipFile("%s.zip" % (proc["strings_path"]), "w")
                            f.write(proc["strings_path"], os.path.basename(proc["strings_path"]), zipfile.ZIP_DEFLATED)
                            f.close()
                            os.remove(proc["strings_path"])
                            proc["strings_path"] = "%s.zip" % (proc["strings_path"])
                        except:
                            raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                if zipdump:
                    try:
                        f = zipfile.ZipFile("%s.zip" % (dmp_path), "w")
                        f.write(dmp_path, os.path.basename(dmp_path), zipfile.ZIP_DEFLATED)
                        f.close()
                        os.remove(dmp_path)
                        proc["file"] = "%s.zip" % (dmp_path)
                    except:
                        raise CuckooProcessingError("Error creating Process Memory Zip File %s" % e)

                results.append(proc)
        return results

Example #53

Show file

File: procmemory.py Project: onesorzer0es/CAPEv2

    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = str(self.options.get("minchars", 5)).encode()

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                if os.path.getsize(dmp_path) == 0:
                    continue

                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(
                    os.path.splitext(os.path.basename(dmp_path))[0])
                for process in self.results.get("behavior", {}).get(
                        "processes", []) or []:
                    if process_id == process["process_id"]:
                        process_name = process["process_name"]
                        process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(category="memory"),
                    cape_yara=dmp_file.get_yara(category="CAPE"),
                    address_space=procdump.pretty_print(),
                )

                for hit in proc["cape_yara"]:
                    hit["memblocks"] = {}
                    for item in hit["addresses"]:
                        memblock = self.get_yara_memblock(
                            proc["address_space"], hit["addresses"][item])
                        if memblock:
                            hit["memblocks"][item] = memblock

                # if self.options.get("extract_pe", False)
                extracted_pes = self.get_procmemory_pe(proc)

                endlimit = b""
                if not HAVE_RE2:
                    endlimit = b"8192"

                if do_strings:
                    if nulltermonly:
                        apat = b"([\x20-\x7e]{" + minchars + b"," + endlimit + b"})\x00"
                        upat = b"((?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"})\x00\x00"
                    else:
                        apat = b"[\x20-\x7e]{" + minchars + b"," + endlimit + b"}"
                        upat = b"(?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(ws.decode("utf-16le").encode())

                    proc["strings_path"] = f"{dmp_path}.strings"
                    proc["extracted_pe"] = extracted_pes
                    f = open(proc["strings_path"], "wb")
                    f.write(b"\n".join(strings))
                    f.close()

                procdump.close()
                results.append(proc)

                cape_name = cape_name_from_yara(proc, process_id, self.results)
                if cape_name and "detections" not in self.results:
                    self.results["detections"] = cape_name
        return results