Example #1
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                if "-" in os.path.basename(dump_path):
                    pid = int(os.path.basename(dump_path).split("-")[0])
                else:
                    pid = int(os.path.basename(dump_path).split(".")[0])

                proc = dict(
                    file=dump_path, pid=pid,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)

                results.append(proc)

        return results
Example #2
0
    def run(self):
        """Run analysis.
        @return: list of dropped files with related information.
        """
        self.key = "dropped"
        dropped_files, meta = [], {}

        if os.path.exists(self.dropped_meta_path):
            for line in open(self.dropped_meta_path, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                }

        for dir_name, dir_names, file_names in os.walk(self.dropped_path):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                file_info.update(meta.get(file_info["path"], {}))
                dropped_files.append(file_info)

        for dir_name, dir_names, file_names in os.walk(self.package_files):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                file_info = File(file_path=file_path).get_all()
                dropped_files.append(file_info)

        return dropped_files
Example #3
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path)
                )

                results.append(proc)

        return results
Example #4
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)

                # Let's hope the file is not too big.
                buf = open(dmp_path, "rb").read()
                urls = set()
                for url in re.findall(HTTP_REGEX, buf):
                    if not is_whitelisted_domain(url[1]):
                        urls.add("".join(url))

                proc = dict(
                    file=dmp_path,
                    pid=os.path.splitext(os.path.basename(dmp_path))[0],
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    urls=list(urls),
                )

                results.append(proc)

        return results
Example #5
0
    def run(self):
        """Run androguard to extract static android information
                @return: list of static features
        """
        self.key = "apkinfo"
        apkinfo = {}

        if "file" not in self.task["category"] or not HAVE_ANDROGUARD:
            return

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    manifest = {}
                    apkinfo["files"] = self._apk_files(a)
                    manifest["package"] = a.get_package()
                    # manifest["permissions"]=a.get_details_permissions_new()
                    manifest["main_activity"] = a.get_main_activity()
                    manifest["activities"] = a.get_activities()
                    manifest["services"] = a.get_services()
                    manifest["receivers"] = a.get_receivers()
                    # manifest["receivers_actions"]=a.get__extended_receivers()
                    manifest["providers"] = a.get_providers()
                    manifest["libraries"] = a.get_libraries()
                    apkinfo["manifest"] = manifest
                    # apkinfo["certificate"] = a.get_certificate()
                    static_calls = {}
                    if self.check_size(apkinfo["files"]):
                        vm = DalvikVMFormat(a.get_dex())
                        vmx = uVMAnalysis(vm)

                        static_calls["all_methods"] = self.get_methods(vmx)
                        static_calls["is_native_code"] = analysis.is_native_code(vmx)
                        static_calls["is_dynamic_code"] = analysis.is_dyn_code(vmx)
                        static_calls["is_reflection_code"] = analysis.is_reflection_code(vmx)

                        # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx)
                        # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx)
                        # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx)
                        # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx)
                        # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx)
                    else:
                        log.warning("Dex size bigger than: %s",
                                    self.options.decompilation_threshold)
                    apkinfo["static_method_calls"] = static_calls
            except (IOError, OSError, BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return apkinfo
Example #6
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if self.options.get("extract_img") and not HAVE_PEFILE:
            log.warning(
                "In order to extract PE files from memory dumps it is "
                "required to have pefile installed (`pip install pefile`)."
            )

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                pid, num = map(int, re.findall("(\\d+)", dmp))

                proc = dict(
                    file=dump_path, pid=pid, num=num,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)

                if self.options.get("extract_img") and HAVE_PEFILE:
                    proc["extracted"] = list(self.dump_images(proc))

                if self.options.get("dump_delete"):
                    try:
                        os.remove(dump_path)
                    except OSError:
                        log.error("Unable to delete memory dump file at path \"%s\"", dump_path)

                results.append(proc)

        results.sort(key=lambda x: (x["pid"], x["num"]))
        return results
Example #7
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        for dmp in os.listdir(self.pmemory_path):
            dmp_path = os.path.join(self.pmemory_path, dmp)
            dmp_file = File(dmp_path)

            proc = dict(
                yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar"))
            )

            results.append(proc)

        return results
Example #8
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                if not dmp.endswith(".dmp"):
                    continue

                dump_path = os.path.join(self.pmemory_path, dmp)
                dump_file = File(dump_path)

                dump_name = os.path.basename(dump_path)
                pid = int(re.findall("(\d{2,5})", dump_name)[0])

                proc = dict(
                    file=dump_path, pid=pid,
                    yara=dump_file.get_yara("memory"),
                    urls=list(dump_file.get_urls()),
                    regions=list(self.read_dump(dump_path)),
                )

                if self.options.get("idapro"):
                    self.create_idapy(proc)
                    
                if self.options.get("dump_delete"):
                    try:
                        os.remove(dump_path)
                    except OSError:
                        log.error("Unable to delete memory dump file at path \"%s\"", dump_path)

                results.append(proc)

        return results
Example #9
0
def demux_sample(filename, package, options):
    """
    If file is a ZIP, extract its included files and return their file paths
    If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs)
    If file is a password-protected Office doc and password is supplied, return path to decrypted doc
    """

    magic = File(filename).get_type()

    # if file is an Office doc and password is supplied, try to decrypt the doc
    if "Microsoft" in magic or "Composite Document File" in magic or "CDFV2 Encrypted" in magic:
        password = None
        if "password="******",")
            for field in fields:
                try:
                    key, value = field.split("=", 1)
                    if key == "password":
                        password = value
                        break
                except:
                    pass
        if password:
            return demux_office(filename, password)
        else:
            return [filename]

    # if a package was specified, then don't do anything special
    # this will allow for the ZIP package to be used to analyze binaries with included DLL dependencies
    # do the same if file= is specified in the options
    if package or "file=" in options:
        return [filename]

    # don't try to extract from Java archives or executables
    if "Java Jar" in magic:
        return [filename]
    if "PE32" in magic or "MS-DOS executable" in magic:
        return [filename]

    retlist = demux_zip(filename, options)
    if not retlist:
        retlist = demux_rar(filename, options)
    if not retlist:
        retlist = demux_tar(filename, options)
    if not retlist:
        retlist = demux_email(filename, options)
    if not retlist:
        retlist = demux_msg(filename, options)
    if not retlist:
        retlist = demux_tnef(filename, options)
    # handle ZIPs/RARs inside extracted files
    if retlist:
        newretlist = []
        for item in retlist:
            zipext = demux_zip(item, options)
            if zipext:
                newretlist.extend(zipext)
            else:
                rarext = demux_rar(item, options)
                if rarext:
                    newretlist.extend(rarext)
                else:
                    tarext = demux_tar(item, options)
                    if tarext:
                        newretlist.extend(tarext)
                    else:
                        newretlist.append(item)
        retlist = newretlist

    # if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the
    # original file

    if not retlist:
        retlist.append(filename)

    return retlist
Example #10
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to S3.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        self.s3_region = self.options.get("region", "us-west-2")
        self.s3_access_key = self.options.get("access_key", "")
        self.s3_secret_key = self.options.get("secret_key", "")
        s3_reports_bucket_name = self.options.get("reports_bucket", "")
        s3_shots_bucket_name = self.options.get("shots_bucket", "")
        s3_samples_bucket_name = self.options.get("samples_bucket", "")
        s3_files_bucket_name = self.options.get("files_bucket", "")
        s3_aux_bucket_name = self.options.get("aux_bucket", "")
        s3_logs_bucket_name = self.options.get("logs_bucket", "")
        s3_pcap_bucket_name = self.options.get("pcap_bucket", "")
        s3_md5_bucket_name = self.options.get("md5_bucket", "")
        cleanup = self.options.get("cleanup", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file))
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                chunk_count = 0
                # Using this type of prefix is useful because you can always re-construct it from
                # the original results
                #chunk_prefix = str(results['info']['id']) + '/' + process['process_name']
                chunk_prefix = str(results['info']['id']) + '/' + str(process['process_id'])
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in S1.

                    if len(chunk) == 100:
                        chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                        #log.debug("INFO TIME!")
                        #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                        #log.debug(chunk_prefix)
                        err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk))
                        if err != '':
                            log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err))
                        else:
                            chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count))
                            chunk_count += 1
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                    #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                    err = self.save_to_s3(s3_reports_bucket_name, chunk_name, json.dumps(chunk))
                    if err != '':
                        log.error("Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}".format(chunk_name, err))
                    else:
                        chunks_ids.append("{0}.{1}".format(chunk_prefix, chunk_count))

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"], results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])

        # Store the report (it's 'object id' is simply the analysis id)
        # First make sure it's not too big (5gb limit)
        data = json.dumps(report)
        if len(data) < 5000000000:
            err = self.save_to_s3(s3_reports_bucket_name, results['info']['id'], data)
            if err != '':
                log.error("Non-size related issue saving analysis JSON to S3 for report {0} - {1}".format(results['info']['id'], err))
        else:
            log.error("JSON for analysis id {0} is greater than 5GB".format(results['info']['id']))

        #processes the rest of the analysis files and put them in S3
        if s3_shots_bucket_name != '':
            shots_path = os.path.join(results['info']['id'], self.analysis_path, "shots")
            self.relocate_to_s3(results['info']['id'], shots_path, s3_shots_bucket_name)
        if s3_pcap_bucket_name != '':
            if os.path.isfile(self.analysis_path + '/dump.pcap'):
                with open(self.analysis_path + '/dump.pcap', 'rb') as infile:
                    self.save_to_s3(s3_pcap_bucket_name, "{0}/dump.pcap".format(results['info']['id']), infile.read())
            if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'):
                with open(self.analysis_path + '/dump_sorted.pcap', 'rb') as infile:
                    self.save_to_s3(s3_pcap_bucket_name, "{0}/dump_sorted.pcap".format(results['info']['id']), infile.read())
        if s3_aux_bucket_name != '':
            aux_path = os.path.join(results['info']['id'], self.analysis_path, "aux")
            self.relocate_to_s3(results['info']['id'], aux_path, s3_aux_bucket_name)
        if s3_logs_bucket_name != '':
            logs_path = os.path.join(results['info']['id'], self.analysis_path, "logs")
            self.relocate_to_s3(results['info']['id'], logs_path, s3_logs_bucket_name)
        if s3_samples_bucket_name != '':
            sample = os.path.realpath(self.analysis_path + '/binary')
            with open(sample, 'rb') as infile:
                self.save_to_s3(s3_samples_bucket_name, results['target']['file']['sha256'], infile.read())
        #log.debug(s3_files_bucket_name)
        if s3_files_bucket_name != '':
            #log.debug(self.analysis_path)
            for root, dirnames, filenames in os.walk(self.analysis_path + '/files'):
                #log.debug(filenames)
                for filename in filenames:
                    key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1] + '/' + filename
                    if not filename.endswith('_info.txt'):
                        key_name = str(results['info']['id']) + '/' + root.split(os.sep)[-1]
                    #log.debug(key_name)
                    with open(os.path.join(root, filename), 'rb') as infile:
                        self.save_to_s3(s3_files_bucket_name, key_name, infile.read())
        if s3_md5_bucket_name != '':
            info = {}
            info.update(report['info'])
            info.update(report['target'])
            self.save_to_s3(s3_md5_bucket_name, results['target']['file']['md5'], json.dumps(info))

        if cleanup:
            shutil.rmtree(self.analysis_path)
Example #11
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        for dropped in report["dropped"]:
            new_drop = dict(dropped)
            drop = File(dropped["path"])
            if drop.valid():
                dropped_id = self.store_file(drop, filename=dropped["name"])
                new_drop["object_id"] = dropped_id

            new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"],
                                 "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()
Example #12
0
def index(request, resubmit_hash=False):
    if request.method == "POST":

        static, package, timeout, priority, options, machine, platform, tags, custom, memory, \
            clock, enforce_timeout, shrike_url, shrike_msg, shrike_sid, shrike_refer, unique, referrer, \
            tlp = parse_request_arguments(request)

        # This is done to remove spaces in options but not breaks custom paths
        options = ",".join("=".join(value.strip()
                                    for value in option.split("=", 1))
                           for option in options.split(",")
                           if option and "=" in option)
        opt_filename = get_user_filename(options, custom)

        if priority and web_conf.public.enabled and web_conf.public.priority:
            priority = web_conf.public.priority

        if timeout and web_conf.public.enabled and web_conf.public.timeout:
            timeout = web_conf.public.timeout

        if options:
            options += ","

        if referrer:
            options += "referrer=%s," % (referrer)

        if request.POST.get("free"):
            options += "free=yes,"

        if request.POST.get("nohuman"):
            options += "nohuman=yes,"

        if request.POST.get("tor"):
            options += "tor=yes,"

        if request.POST.get("route", None):
            options += "route={0},".format(request.POST.get("route", None))

        if request.POST.get("process_dump"):
            options += "procdump=0,"

        if request.POST.get("process_memory"):
            options += "procmemdump=1,"

        if request.POST.get("import_reconstruction"):
            options += "import_reconstruction=1,"

        if request.POST.get("disable_cape"):
            options += "disable_cape=1,"

        if request.POST.get("kernel_analysis"):
            options += "kernel_analysis=yes,"

        if request.POST.get("norefer"):
            options += "norefer=1,"

        if request.POST.get("oldloader"):
            options += "loader=oldloader.exe,loader_64=oldloader_x64.exe,"

        if request.POST.get("unpack"):
            options += "unpack=yes,"

        options = options[:-1]

        task_machines = []
        opt_apikey = False
        opts = get_options(options)
        if opts:
            opt_apikey = opts.get("apikey", False)

        status = "ok"
        task_ids_tmp = list()

        details = {
            "errors": [],
            "content": False,
            "request": request,
            "task_ids": [],
            "url": False,
            "params": {},
            "headers": {},
            "service": "Local",
            "path": "",
            "fhash": False,
            "options": options,
            "only_extraction": False,
            "task_machines": task_machines,
        }

        if "hash" in request.POST and request.POST.get(
                "hash", False) and request.POST.get("hash")[0] != '':
            resubmission_hash = request.POST.get("hash").strip()
            paths = db.sample_path_by_hash(resubmission_hash)
            if paths:
                content = get_file_content(paths)
                if not content:
                    return render(
                        request, "error.html", {
                            "error":
                            "Can't find {} on disk, {}".format(
                                resubmission_hash, str(paths))
                        })
                folder = os.path.join(settings.TEMP_PATH, "cape-resubmit")
                if not os.path.exists(folder):
                    os.makedirs(folder)
                base_dir = tempfile.mkdtemp(prefix='resubmit_', dir=folder)
                if opt_filename:
                    filename = base_dir + "/" + opt_filename
                else:
                    filename = base_dir + "/" + sanitize_filename(
                        resubmission_hash)
                path = store_temp_file(content, filename)
                details["path"] = path
                details["content"] = content
                status, task_ids_tmp = download_file(**details)
                if status == "error":
                    details["errors"].append(
                        {os.path.basename(filename): task_ids_tmp})
                else:
                    details["task_ids"] = task_ids_tmp
            else:
                return render(
                    request, "error.html",
                    {"error": "File not found on hdd for resubmission"})

        elif "sample" in request.FILES:
            samples = request.FILES.getlist("sample")
            details["service"] = "WebGUI"
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    details["errors"].append(
                        {sample.name: "You uploaded an empty file."})
                    continue
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    details["errors"].append({
                        sample.name:
                        "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                    })
                    continue

                if opt_filename:
                    filename = opt_filename
                else:
                    filename = sanitize_filename(sample.name)
                # Moving sample from django temporary file to CAPE temporary storage to let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), filename)
                if unique and db.check_file_uniq(File(path).get_sha256()):
                    return render(
                        request, "error.html", {
                            "error":
                            "Duplicated file, disable unique option to force submission"
                        })

                if timeout and web_conf.public.enabled and web_conf.public.timeout and timeout > web_conf.public.timeout:
                    timeout = web_conf.public.timeout

                magic_type = get_magic_type(path)
                platform = get_platform(magic_type)
                if machine.lower() == "all":
                    details["task_machines"] = [
                        vm.name for vm in db.list_machines(platform=platform)
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if hasattr(machine_details, "platform"
                               ) and not machine_details.platform == platform:
                        return render(
                            request,
                            "error.html",
                            {
                                "error":
                                "Wrong platform, {} VM selected for {} sample".
                                format(machine_details.platform, platform)
                            },
                        )
                    else:
                        details["task_machines"] = [machine]

                else:
                    details["task_machines"] = ["first"]
                details["path"] = path
                details["content"] = get_file_content(path)
                status, task_ids = download_file(**details)

        elif "quarantine" in request.FILES:
            samples = request.FILES.getlist("quarantine")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty quarantine file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a quarantine file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                tmp_path = store_temp_file(sample.read(), sample.name)

                path = unquarantine(tmp_path)
                try:
                    os.remove(tmp_path)
                except Exception as e:
                    print(e)

                if not path:
                    return render(request, "error.html", {
                        "error":
                        "You uploaded an unsupported quarantine file."
                    })

                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform="windows")
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == "windows":
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, linux VM selected for {} sample"
                                .format(machine_details.platform)
                            })
                    else:
                        task_machines = [machine]

                details["path"] = path
                details["content"] = get_file_content(path)
                status, task_ids_tmp = download_file(**details)
                if status == "error":
                    details["errors"].append({sample.name: task_ids_tmp})
                else:
                    details["task_ids"] = task_ids_tmp

        elif "static" in request.FILES:
            samples = request.FILES.getlist("static")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                task_id = db.add_static(file_path=path,
                                        priority=priority,
                                        tlp=tlp)
                if not task_id:
                    return render(
                        request, "error.html",
                        {"error": "We don't have static extractor for this"})
                task_ids.append(task_id)

        elif "pcap" in request.FILES:
            samples = request.FILES.getlist("pcap")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty PCAP file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a PCAP file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                if sample.name.lower().endswith(".saz"):
                    saz = saz_to_pcap(path)
                    if saz:
                        try:
                            os.remove(path)
                        except Exception as e:
                            pass
                        path = saz
                    else:
                        return render(
                            request, "error.html",
                            {"error": "Conversion from SAZ to PCAP failed."})

                task_id = db.add_pcap(file_path=path,
                                      priority=priority,
                                      tlp=tlp)
                if task_id:
                    task_ids.append(task_id)

        elif "url" in request.POST and request.POST.get("url").strip():
            url = request.POST.get("url").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")

            if machine.lower() == "all":
                details["task_machines"] = [
                    vm.name for vm in db.list_machines(platform="windows")
                ]
            elif machine:
                machine_details = db.view_machine(machine)
                if hasattr(machine_details, "platform"
                           ) and not machine_details.platform == "windows":
                    details["errors"].append({
                        os.path.basename(url):
                        "Wrong platform, linux VM selected for {} sample".
                        format(machine_details.platform)
                    })
                else:
                    details["task_machines"] = [machine]

            details["path"] = path
            details["content"] = get_file_content(path)
            status, task_ids_tmp = download_file(**details)
            if status == "error":
                details["errors"].append({sample.name: task_ids_tmp})
            else:
                details["task_ids"] = task_ids_tmp

        elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip():
            url = request.POST.get("dlnexec").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")
            response = _download_file(request.POST.get("route", None), url,
                                      options)
            if not response:
                return render(request, "error.html",
                              {"error": "Was impossible to retrieve url"})

            name = os.path.basename(url)
            if not "." in name:
                name = get_user_filename(options,
                                         custom) or generate_fake_name()

            if machine.lower() == "all":
                details["task_machines"] = [
                    vm.name for vm in db.list_machines(platform=platform)
                ]
            elif machine:
                machine_details = db.view_machine(machine[0])
                if hasattr(machine_details, "platform"
                           ) and not machine_details.platform == platform:
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, {} VM selected for {} sample".
                            format(machine_details.platform, platform)
                        })
                else:
                    details["task_machines"] = [machine]

            path = store_temp_file(response, name)
            details["path"] = path
            details["content"] = get_file_content(path)
            details["service"] = "DLnExec"
            status, task_ids_tmp = download_file(**details)
            if status == "error":
                details["errors"].append({name: task_ids_tmp})
            else:
                details["task_ids"] = task_ids_tmp
        elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get(
                "vtdl", False) and request.POST.get("vtdl")[0] != "":
            if not settings.VTDL_KEY or not settings.VTDL_PATH:
                return render(
                    request, "error.html", {
                        "error":
                        "You specified VirusTotal but must edit the file and specify your VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory"
                    })
            else:
                if opt_apikey:
                    details["apikey"] = opt_apikey
                details = download_from_vt(
                    request.POST.get("vtdl").strip(), details, opt_filename,
                    settings)

        if details.get("task_ids"):
            tasks_count = len(details["task_ids"])
        else:
            tasks_count = 0
        if tasks_count > 0:
            data = {
                "tasks": details["task_ids"],
                "tasks_count": tasks_count,
                "errors": details["errors"]
            }
            return render(request, "submission/complete.html", data)
        else:
            return render(
                request, "error.html", {
                    "error": "Error adding task(s) to CAPE's database.",
                    "errors": details["errors"]
                })
    else:
        enabledconf = dict()
        enabledconf["vt"] = settings.VTDL_ENABLED
        enabledconf["kernel"] = settings.OPT_ZER0M0N
        enabledconf["memory"] = processing.memory.get("enabled")
        enabledconf["procmemory"] = processing.procmemory.get("enabled")
        enabledconf["dlnexec"] = settings.DLNEXEC
        enabledconf["url_analysis"] = settings.URL_ANALYSIS
        enabledconf["tags"] = False
        enabledconf[
            "dist_master_storage_only"] = repconf.distributed.master_storage_only
        enabledconf["linux_on_gui"] = web_conf.linux.enabled
        enabledconf["tlp"] = web_conf.tlp.enabled

        if all_vms_tags:
            enabledconf["tags"] = True

        if not enabledconf["tags"]:
            # load multi machinery tags:
            # Get enabled machinery
            machinery = cfg.cuckoo.get("machinery")
            if machinery == "multi":
                for mmachinery in Config(machinery).multi.get(
                        "machinery").split(","):
                    vms = [
                        x.strip() for x in getattr(Config(
                            mmachinery), mmachinery).get("machines").split(",")
                    ]
                    if any([
                            "tags"
                            in list(getattr(Config(mmachinery), vmtag).keys())
                            for vmtag in vms
                    ]):
                        enabledconf["tags"] = True
                        break
            else:
                # Get VM names for machinery config elements
                vms = [
                    x.strip() for x in getattr(Config(
                        machinery), machinery).get("machines").split(",")
                ]
                # Check each VM config element for tags
                if any([
                        "tags"
                        in list(getattr(Config(machinery), vmtag).keys())
                        for vmtag in vms
                ]):
                    enabledconf["tags"] = True

        packages, machines = get_form_data("windows")

        socks5s = _load_socks5_operational()
        socks5s_random = ""
        if socks5s:
            socks5s_random = random.choice(list(socks5s.values())).get(
                "description", False)

        return render(
            request,
            "submission/index.html",
            {
                "packages": sorted(packages),
                "machines": machines,
                "vpns": list(vpns.values()),
                "socks5s": list(socks5s.values()),
                "socks5s_random": socks5s_random,
                "route": routing.routing.route,
                "internet": routing.routing.internet,
                "inetsim": routing.inetsim.enabled,
                "tor": routing.tor.enabled,
                "config": enabledconf,
                "resubmit": resubmit_hash,
                "tags": sorted(list(set(all_vms_tags))),
            },
        )
Example #13
0
def index(request, resubmit_hash=False):
    if request.method == "POST":
        package = request.POST.get("package", "")
        timeout = min(force_int(request.POST.get("timeout")), 60 * 60 * 24)
        options = request.POST.get("options", "")
        lin_options = request.POST.get("lin_options", "")
        priority = force_int(request.POST.get("priority"))
        machine = request.POST.get("machine", "")
        clock = request.POST.get(
            "clock",
            datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S"))
        if not clock:
            clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")
        if "1970" in clock:
            clock = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")
        custom = request.POST.get("custom", "")
        memory = bool(request.POST.get("memory", False))
        enforce_timeout = bool(request.POST.get("enforce_timeout", False))
        referrer = validate_referrer(request.POST.get("referrer", None))
        tags = request.POST.get("tags", None)
        static = bool(request.POST.get("static", False))
        all_tags = load_vms_tags()
        if tags and not all(
            [tag.strip() in all_tags for tag in tags.split(",")]):
            return render(request, "error.html", {
                "error":
                "Check Tags help, you have introduced incorrect tag(s)"
            })

        if lin_options:
            options = lin_options
        # This is done to remove spaces in options but not breaks custom paths
        options = ','.join('='.join(value.strip()
                                    for value in option.split("=", 1))
                           for option in options.split(",")
                           if option and '=' in option)
        opt_filename = get_user_filename(options, custom)

        if referrer:
            if options:
                options += ","
            options += "referrer=%s" % (referrer)

        if request.POST.get("free"):
            if options:
                options += ","
            options += "free=yes"

        if request.POST.get("nohuman"):
            if options:
                options += ","
            options += "nohuman=yes"

        if request.POST.get("tor"):
            if options:
                options += ","
            options += "tor=yes"

        if request.POST.get("route", None):
            if options:
                options += ","
            options += "route={0}".format(request.POST.get("route", None))

        if request.POST.get("process_dump"):
            if options:
                options += ","
            options += "procmemdump=1,procdump=1"

        if request.POST.get("process_memory"):
            if options:
                options += ","
            options += "procmemdump=1,procdump=1"

        if request.POST.get("import_reconstruction"):
            if options:
                options += ","
            options += "import_reconstruction=1"

        if request.POST.get("disable_cape"):
            if options:
                options += ","
            options += "disable_cape=1"

        if request.POST.get("kernel_analysis"):
            if options:
                options += ","
            options += "kernel_analysis=yes"

        if request.POST.get("norefer"):
            if options:
                options += ","
            options += "norefer=1"

        if request.POST.get("oldloader"):
            if options:
                options += ","
            options += "loader=oldloader.exe,loader_64=oldloader_x64.exe"

        if request.POST.get("unpack"):
            if options:
                options += ","
            options += "unpack=yes"

        unique = request.POST.get("unique", False)

        orig_options = options
        task_ids = []
        task_machines = []

        status = "ok"
        failed_hashes = list()
        task_ids_tmp = list()
        if "hash" in request.POST and request.POST.get(
                "hash", False) and request.POST.get("hash")[0] != '':
            resubmission_hash = request.POST.get("hash").strip()
            paths = db.sample_path_by_hash(resubmission_hash)
            if paths:
                paths = [
                    _f for _f in [
                        path if os.path.exists(path) else False
                        for path in paths
                    ] if _f
                ]
                if not paths and FULL_DB:
                    tasks = results_db.analysis.find(
                        {"dropped.sha256": resubmission_hash})
                    if tasks:
                        for task in tasks:
                            # grab task id and replace in path if needed aka distributed hack
                            path = os.path.join(settings.CUCKOO_PATH,
                                                "storage", "analyses",
                                                str(task["info"]["id"]),
                                                "files", resubmission_hash)
                            if os.path.exists(path):
                                paths = [path]
                                break

            if paths:
                content = False
                content = get_file_content(paths)
                if not content:
                    return render(
                        request, "error.html", {
                            "error":
                            "Can't find {} on disk, {}".format(
                                resubmission_hash, str(paths))
                        })
                base_dir = tempfile.mkdtemp(prefix='resubmit_',
                                            dir=settings.TEMP_PATH)
                if opt_filename:
                    filename = base_dir + "/" + opt_filename
                else:
                    filename = base_dir + "/" + sanitize_filename(
                        resubmission_hash)
                path = store_temp_file(content, filename)
                headers = {}
                url = 'local'
                params = {}

                status, task_ids = download_file(
                    False, content, request, db, task_ids, url, params,
                    headers, "Local", path, package, timeout, options,
                    priority, machine, clock, custom, memory, enforce_timeout,
                    referrer, tags, orig_options, "", static)
            else:
                return render(
                    request, "error.html",
                    {"error": "File not found on hdd for resubmission"})

        elif "sample" in request.FILES:
            samples = request.FILES.getlist("sample")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum allowed upload size "
                            "specified in web/web/local_settings.py."
                        })

                if opt_filename:
                    filename = opt_filename
                else:
                    filename = sample.name
                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), filename)

                if unique and db.check_file_uniq(File(path).get_sha256()):
                    return render(
                        request, "error.html", {
                            "error":
                            "Duplicated file, disable unique option to force submission"
                        })

                magic_type = get_magic_type(path)
                if disable_x64 is True:
                    if magic_type and ("x86-64" in magic_type
                                       or "PE32+" in magic_type):
                        if len(samples) == 1:
                            return render(
                                request, "error.html",
                                {"error": "Sorry no x64 support yet"})
                        else:
                            continue

                    orig_options, timeout, enforce_timeout = recon(
                        path, orig_options, timeout, enforce_timeout)

                platform = get_platform(magic_type)
                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform=platform)
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == platform:
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, {} VM selected for {} sample".
                                format(machine_details.platform, platform)
                            })
                    else:
                        task_machines.append(machine)

                else:
                    task_machines.append("first")

                for entry in task_machines:
                    if entry == "first":
                        entry = None
                    try:
                        task_ids_new = db.demux_sample_and_add_to_db(
                            file_path=path,
                            package=package,
                            timeout=timeout,
                            options=options,
                            priority=priority,
                            machine=entry,
                            custom=custom,
                            memory=memory,
                            platform=platform,
                            enforce_timeout=enforce_timeout,
                            tags=tags,
                            clock=clock,
                            static=static)
                        task_ids.extend(task_ids_new)
                    except CuckooDemuxError as err:
                        return render(request, "error.html", {"error": err})

        elif "quarantine" in request.FILES:
            samples = request.FILES.getlist("quarantine")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty quarantine file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a quarantine file that exceeds the maximum \
                                  allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                tmp_path = store_temp_file(sample.read(), sample.name)

                path = unquarantine(tmp_path)
                try:
                    os.remove(tmp_path)
                except Exception as e:
                    pass

                if not path:
                    return render(request, "error.html", {
                        "error":
                        "You uploaded an unsupported quarantine file."
                    })

                if machine.lower() == "all":
                    task_machines = [
                        vm.name for vm in db.list_machines(platform="windows")
                    ]
                elif machine:
                    machine_details = db.view_machine(machine)
                    if not machine_details.platform == "windows":
                        return render(
                            request, "error.html", {
                                "error":
                                "Wrong platform, linux VM selected for {} sample"
                                .format(machine_details.platform)
                            })
                    else:
                        task_machines.append(machine)

                if not task_machines:
                    task_machines.append("first")

                for entry in task_machines:
                    if entry == "first":
                        entry = None
                    task_ids_new = db.demux_sample_and_add_to_db(
                        file_path=path,
                        package=package,
                        timeout=timeout,
                        options=options,
                        priority=priority,
                        machine=entry,
                        custom=custom,
                        memory=memory,
                        tags=tags,
                        enforce_timeout=enforce_timeout,
                        clock=clock)
                    if task_ids_new:
                        task_ids.extend(task_ids_new)

        elif "static" in request.FILES:
            samples = request.FILES.getlist("static")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum \
                    allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                task_id = db.add_static(file_path=path, priority=priority)
                if not task_id:
                    return render(
                        request, "error.html",
                        {"error": "We don't have static extractor for this"})
                task_ids.append(task_id)

        elif "pcap" in request.FILES:
            samples = request.FILES.getlist("pcap")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty PCAP file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a PCAP file that exceeds the maximum \
                     allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                if sample.name.lower().endswith(".saz"):
                    saz = saz_to_pcap(path)
                    if saz:
                        try:
                            os.remove(path)
                        except Exception as e:
                            pass
                        path = saz
                    else:
                        return render(
                            request, "error.html",
                            {"error": "Conversion from SAZ to PCAP failed."})

                task_id = db.add_pcap(file_path=path, priority=priority)
                if task_id:
                    task_ids.append(task_id)

        elif "url" in request.POST and request.POST.get("url").strip():
            url = request.POST.get("url").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")

            if machine.lower() == "all":
                task_machines = [
                    vm.name for vm in db.list_machines(platform="windows")
                ]
            elif machine:
                machine_details = db.view_machine(machine)
                if not machine_details.platform == "windows":
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, linux VM selected for {} sample".
                            format(machine_details.platform)
                        })
                else:
                    task_machines.append(machine)

            else:
                task_machines.append("first")

            for entry in task_machines:
                if entry == "first":
                    entry = None
                task_ids_new = db.add_url(url=url,
                                          package=package,
                                          timeout=timeout,
                                          options=options,
                                          priority=priority,
                                          machine=entry,
                                          custom=custom,
                                          memory=memory,
                                          enforce_timeout=enforce_timeout,
                                          tags=tags,
                                          clock=clock)
                if task_ids_new:
                    task_ids.extend(task_ids_new)

        elif "dlnexec" in request.POST and request.POST.get("dlnexec").strip():
            url = request.POST.get("dlnexec").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")
            response = _download_file(request.POST.get("route", None), url,
                                      options)
            if not response:
                return render(request, "error.html",
                              {"error": "Was impossible to retrieve url"})

            name = os.path.basename(url)
            if not "." in name:
                name = get_user_filename(options,
                                         custom) or generate_fake_name()
            path = store_temp_file(response, name)

            magic_type = get_magic_type(path)
            platform = get_platform(magic_type)

            if machine.lower() == "all":
                task_machines = [
                    vm.name for vm in db.list_machines(platform=platform)
                ]
            elif machine:
                machine_details = db.view_machine(machine[0])
                if not machine_details.platform == platform:
                    return render(
                        request, "error.html", {
                            "error":
                            "Wrong platform, {} VM selected for {} sample".
                            format(machine_details.platform, platform)
                        })
                else:
                    task_machines.append(machine)
            else:
                task_machines.append("first")
            for entry in task_machines:
                if entry == "first":
                    entry = None
                task_ids_new = db.demux_sample_and_add_to_db(
                    file_path=path,
                    package=package,
                    timeout=timeout,
                    options=options,
                    priority=priority,
                    machine=entry,
                    custom=custom,
                    memory=memory,
                    enforce_timeout=enforce_timeout,
                    tags=tags,
                    platform=platform,
                    clock=clock)
                if task_ids_new:
                    task_ids.extend(task_ids_new)

        elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get("vtdl", False) \
                and request.POST.get("vtdl")[0] != '':
            vtdl = request.POST.get("vtdl").strip()
            if (not settings.VTDL_PRIV_KEY
                    and not settings.VTDL_INTEL_KEY) or not settings.VTDL_PATH:
                return render(
                    request, "error.html", {
                        "error":
                        "You specified VirusTotal but must edit the file and specify your "
                        "VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory"
                    })
            else:
                hashlist = []
                if "," in vtdl:
                    hashlist = [
                        _f for _f in vtdl.replace(" ", "").strip().split(",")
                        if _f
                    ]
                else:
                    hashlist.append(vtdl)

                for h in hashlist:
                    base_dir = tempfile.mkdtemp(prefix='cuckoovtdl',
                                                dir=settings.VTDL_PATH)
                    task_ids_tmp = list()
                    if opt_filename:
                        filename = base_dir + "/" + opt_filename
                    else:
                        filename = base_dir + "/" + sanitize_filename(h)
                    headers = {}
                    paths = db.sample_path_by_hash(h)
                    content = False
                    if paths:
                        content = get_file_content(paths)
                    if settings.VTDL_PRIV_KEY:
                        headers = {'x-apikey': settings.VTDL_PRIV_KEY}
                    elif settings.VTDL_INTEL_KEY:
                        headers = {'x-apikey': settings.VTDL_INTEL_KEY}
                    url = "https://www.virustotal.com/api/v3/files/{id}/download".format(
                        id=h)
                    params = {}

                    if not content:
                        status, task_ids_tmp = download_file(
                            False, content, request, db, task_ids, url, params,
                            headers, "VirusTotal", filename, package, timeout,
                            options, priority, machine, clock, custom, memory,
                            enforce_timeout, referrer, tags, orig_options, "",
                            static, h)
                    else:
                        status, task_ids_tmp = download_file(
                            False, content, request, db, task_ids, url, params,
                            headers, "Local", filename, package, timeout,
                            options, priority, machine, clock, custom, memory,
                            enforce_timeout, referrer, tags, orig_options, "",
                            static, h)
                    if status is "ok":
                        task_ids = task_ids_tmp
                    else:
                        failed_hashes.append(h)

        if not isinstance(task_ids, list) and status == "error":
            # is render msg
            return task_ids
        if not isinstance(task_ids_tmp, list) and status == "error":
            # is render msg
            return task_ids_tmp
        if isinstance(task_ids, list):
            tasks_count = len(task_ids)
        else:
            # ToDo improve error msg
            tasks_count = 0
        tasks_count = len(task_ids)
        if tasks_count > 0:
            data = {"tasks": task_ids, "tasks_count": tasks_count}
            if failed_hashes:
                data["failed_hashes"] = failed_hashes
            return render(request, "submission/complete.html", data)

        else:
            return render(request, "error.html",
                          {"error": "Error adding task to Cuckoo's database."})
    else:
        enabledconf = dict()
        enabledconf["vt"] = settings.VTDL_ENABLED
        enabledconf["kernel"] = settings.OPT_ZER0M0N
        enabledconf["memory"] = processing.memory.get("enabled")
        enabledconf["procmemory"] = processing.procmemory.get("enabled")
        enabledconf["dlnexec"] = settings.DLNEXEC
        enabledconf["tags"] = False
        enabledconf[
            "dist_master_storage_only"] = repconf.distributed.master_storage_only

        all_tags = load_vms_tags()
        if all_tags:
            enabledconf["tags"] = True

        if not enabledconf["tags"]:
            # load multi machinery tags:
            # Get enabled machinery
            machinery = cfg.cuckoo.get("machinery")
            if machinery == "multi":
                for mmachinery in Config(machinery).multi.get(
                        "machinery").split(","):
                    vms = [
                        x.strip() for x in getattr(Config(
                            mmachinery), mmachinery).get("machines").split(",")
                    ]
                    if any([
                            "tags"
                            in list(getattr(Config(mmachinery), vmtag).keys())
                            for vmtag in vms
                    ]):
                        enabledconf["tags"] = True
                        break
            else:
                # Get VM names for machinery config elements
                vms = [
                    x.strip() for x in getattr(Config(
                        machinery), machinery).get("machines").split(",")
                ]
                # Check each VM config element for tags
                if any([
                        "tags"
                        in list(getattr(Config(machinery), vmtag).keys())
                        for vmtag in vms
                ]):
                    enabledconf["tags"] = True

        packages, machines = get_form_data("windows")

        socks5s = _load_socks5_operational()
        socks5s_random = ""
        if socks5s:
            socks5s_random = random.choice(list(socks5s.values())).get(
                "description", False)

        return render(
            request, "submission/index.html", {
                "packages": sorted(packages),
                "machines": machines,
                "vpns": list(vpns.values()),
                "socks5s": list(socks5s.values()),
                "socks5s_random": socks5s_random,
                "route": routing.routing.route,
                "internet": routing.routing.internet,
                "inetsim": routing.inetsim.enabled,
                "tor": routing.tor.enabled,
                "config": enabledconf,
                "resubmit": resubmit_hash,
                "tags": sorted(list(set(all_tags)))
            })
Example #14
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to Elasticsearch.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_ELASTICSEARCH:
            raise CuckooDependencyError("Unable to import elasticsearch "
                                        "(install with `pip install elasticsearch`)")

        self.connect()
        index_prefix  = self.options.get("index", "cuckoo")
        search_only   = self.options.get("searchonly", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        idxdate = report["info"]["started"].split(" ")[0]
        self.index_name = '{0}-{1}'.format(index_prefix, idxdate)

        if not search_only:
            if not "network" in report:
                report["network"] = {}

            # Store API calls in chunks for pagination in Django
            if "behavior" in report and "processes" in report["behavior"]:
                new_processes = []
                for process in report["behavior"]["processes"]:
                    new_process = dict(process)
                    chunk = []
                    chunks_ids = []
                    # Loop on each process call.
                    for index, call in enumerate(process["calls"]):
                        # If the chunk size is 100 or if the loop is completed then
                        # store the chunk in Elastcisearch.
                        if len(chunk) == 100:
                            to_insert = {"pid": process["process_id"],
                                         "calls": chunk}
                            pchunk = self.es.index(index=self.index_name,
                                                   doc_type="calls", body=to_insert)
                            chunk_id = pchunk['_id']
                            chunks_ids.append(chunk_id)
                            # Reset the chunk.
                            chunk = []

                        # Append call to the chunk.
                        chunk.append(call)

                    # Store leftovers.
                    if chunk:
                        to_insert = {"pid": process["process_id"], "calls": chunk}
                        pchunk = self.es.index(index=self.index_name, 
                                               doc_type="calls", body=to_insert)
                        chunk_id = pchunk['_id']
                        chunks_ids.append(chunk_id)

                    # Add list of chunks.
                    new_process["calls"] = chunks_ids
                    new_processes.append(new_process)

                # Store the results in the report.
                report["behavior"] = dict(report["behavior"])
                report["behavior"]["processes"] = new_processes

            # Add screenshot paths
            report["shots"] = []
            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                shots = [shot for shot in os.listdir(shots_path)
                         if shot.endswith(".jpg")]
                for shot_file in sorted(shots):
                    shot_path = os.path.join(self.analysis_path, "shots",
                                             shot_file)
                    screenshot = File(shot_path)
                    if screenshot.valid():
                        # Strip the extension as it's added later 
                        # in the Django view
                        report["shots"].append(shot_file.replace(".jpg", ""))

            if results.has_key("suricata") and results["suricata"]:
                if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                    report["suri_tls_cnt"] = len(results["suricata"]["tls"])
                if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                    report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
                if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                    report["suri_file_cnt"] = len(results["suricata"]["files"])
                if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                    report["suri_http_cnt"] = len(results["suricata"]["http"])
        else:
            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"]    = results.get("info")
            report["target"]  = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")

        # Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])

        # Store the report and retrieve its object id.
        self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
Example #15
0
def vt_lookup(category: str, target: str, results: dict = {}, on_demand: bool = False):
    if not processing_conf.virustotal.enabled or processing_conf.virustotal.get("on_demand", False) and not on_demand:
        return {}
    if category not in ("file", "url"):
        return {"error": True, "msg": "VT category isn't supported"}

    if category == "file":
        if not do_file_lookup:
            return {"error": True, "msg": "VT File lookup disabled in processing.conf"}
        if not os.path.exists(target) and len(target) != 64:
            return {"error": True, "msg": "File doesn't exist"}

        sha256 = target if len(target) == 64 else File(target).get_sha256()
        url = VIRUSTOTAL_FILE_URL.format(id=sha256)

    elif category == "url":
        if not do_url_lookup:
            return {"error": True, "msg": "VT URL lookup disabled in processing.conf"}
        if urlscrub:
            urlscrub_compiled_re = None
            try:
                urlscrub_compiled_re = re.compile(urlscrub)
            except Exception as e:
                log.error(f"Failed to compile urlscrub regex: {e}")
                return {}
            try:
                target = re.sub(urlscrub_compiled_re, "", target)
            except Exception as e:
                return {"error": True, "msg": f"Failed to scrub url: {e}"}

        # normalize the URL the way VT appears to
        if not target.lower().startswith(("http://", "https://")):
            target = f"http://{target}"
        slashsplit = target.split("/")
        slashsplit[0] = slashsplit[0].lower()
        slashsplit[2] = slashsplit[2].lower()
        if len(slashsplit) == 3:
            slashsplit.append("")
        target = "/".join(slashsplit)

        sha256 = hashlib.sha256(target.encode()).hexdigest()
        url = VIRUSTOTAL_URL_URL.format(id=target)

    try:
        r = requests.get(url, headers=headers, verify=True, timeout=timeout)
        if not r.ok:
            return {"error": True, "msg": f"Unable to complete connection to VirusTotal. Status code: {r.status_code}"}
        vt_response = r.json()
        engines = vt_response.get("data", {}).get("attributes", {}).get("last_analysis_results", {})
        if not engines:
            return {}
        virustotal = {
            "names": vt_response.get("data", {}).get("attributes", {}).get("names"),
            "scan_id": vt_response.get("data", {}).get("id"),
            "md5": vt_response.get("data", {}).get("attributes", {}).get("md5"),
            "sha1": vt_response.get("data", {}).get("attributes", {}).get("sha1"),
            "sha256": vt_response.get("data", {}).get("attributes", {}).get("sha256"),
            "tlsh": vt_response.get("data", {}).get("attributes", {}).get("tlsh"),
            "positive": vt_response.get("data", {}).get("attributes", {}).get("last_analysis_stats", {}).get("malicious"),
            "total": len(engines.keys()),
            "permalink": vt_response.get("data", {}).get("links", {}).get("self"),
        }
        if remove_empty:
            virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items() if block["result"]}
        else:
            virustotal["scans"] = {engine.replace(".", "_"): block for engine, block in engines.items()}

        virustotal["resource"] = sha256
        virustotal["results"] = []
        detectnames = []
        for engine, block in engines.items():
            virustotal["results"] += [{"vendor": engine.replace(".", "_"), "sig": block["result"]}]
            if block["result"] and "Trojan.Heur." not in block["result"]:
                # weight Microsoft's detection, they seem to be more accurate than the rest
                if engine == "Microsoft":
                    detectnames.append(block["result"])
                detectnames.append(block["result"])

        virustotal["detection"] = get_vt_consensus(detectnames)
        if virustotal.get("detection", False) and results:
            add_family_detection(results, virustotal["detection"], "VirusTotal", virustotal["sha256"])
        if virustotal.get("positives", False) and virustotal.get("total", False):
            virustotal["summary"] = f"{virustotal['positives']}/{virustotal['total']}"

        return virustotal
    except requests.exceptions.RequestException as e:
        return {
            "error": True,
            "msg": f"Unable to complete connection to VirusTotal: {e}",
        }

    return {}
Example #16
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to Elasticsearch.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_ELASTICSEARCH:
            raise CuckooDependencyError(
                "Unable to import elasticsearch "
                "(install with `pip3 install elasticsearch`)")

        self.connect()
        index_prefix = self.options.get("index", "cuckoo")
        search_only = self.options.get("searchonly", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        idxdate = report["info"]["started"].split(" ")[0]
        self.index_name = '{0}-{1}'.format(index_prefix, idxdate)

        if not search_only:
            if not "network" in report:
                report["network"] = {}

            # Store API calls in chunks for pagination in Django
            if "behavior" in report and "processes" in report["behavior"]:
                new_processes = []
                for process in report["behavior"]["processes"]:
                    new_process = dict(process)
                    chunk = []
                    chunks_ids = []
                    # Loop on each process call.
                    for index, call in enumerate(process["calls"]):
                        # If the chunk size is 100 or if the loop is completed then
                        # store the chunk in Elastcisearch.
                        if len(chunk) == 100:
                            to_insert = {
                                "pid": process["process_id"],
                                "calls": chunk
                            }
                            pchunk = self.es.index(index=self.index_name,
                                                   doc_type="calls",
                                                   body=to_insert)
                            chunk_id = pchunk['_id']
                            chunks_ids.append(chunk_id)
                            # Reset the chunk.
                            chunk = []

                        # Append call to the chunk.
                        chunk.append(call)

                    # Store leftovers.
                    if chunk:
                        to_insert = {
                            "pid": process["process_id"],
                            "calls": chunk
                        }
                        pchunk = self.es.index(index=self.index_name,
                                               doc_type="calls",
                                               body=to_insert)
                        chunk_id = pchunk['_id']
                        chunks_ids.append(chunk_id)

                    # Add list of chunks.
                    new_process["calls"] = chunks_ids
                    new_processes.append(new_process)

                # Store the results in the report.
                report["behavior"] = dict(report["behavior"])
                report["behavior"]["processes"] = new_processes

            # Add screenshot paths
            report["shots"] = []
            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                shots = [
                    shot for shot in os.listdir(shots_path)
                    if shot.endswith(".jpg")
                ]
                for shot_file in sorted(shots):
                    shot_path = os.path.join(self.analysis_path, "shots",
                                             shot_file)
                    screenshot = File(shot_path)
                    if screenshot.valid():
                        # Strip the extension as it's added later
                        # in the Django view
                        report["shots"].append(shot_file.replace(".jpg", ""))

            # Other info we want Quick access to from the web UI
            if "virustotal" in results and results[
                    "virustotal"] and "positives" in results[
                        "virustotal"] and "total" in results["virustotal"]:
                report["virustotal_summary"] = "%s/%s" % (
                    results["virustotal"]["positives"],
                    results["virustotal"]["total"])

            if "suricata" in results and results["suricata"]:
                if "tls" in results["suricata"] and len(
                        results["suricata"]["tls"]) > 0:
                    report["suri_tls_cnt"] = len(results["suricata"]["tls"])
                if results["suricata"] and "alerts" in results[
                        "suricata"] and len(results["suricata"]["alerts"]) > 0:
                    report["suri_alert_cnt"] = len(
                        results["suricata"]["alerts"])
                if "files" in results["suricata"] and len(
                        results["suricata"]["files"]) > 0:
                    report["suri_file_cnt"] = len(results["suricata"]["files"])
                if "http" in results["suricata"] and len(
                        results["suricata"]["http"]) > 0:
                    report["suri_http_cnt"] = len(results["suricata"]["http"])
        else:
            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"] = results.get("info")
            report["target"] = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])

        # Store the report and retrieve its object id.
        self.es.index(index=self.index_name,
                      doc_type="analysis",
                      id=results["info"]["id"],
                      body=report)
Example #17
0
    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not hasattr(self, "procdump_path") or not os.path.exists(
                self.procdump_path):
            return None
        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)
            if not os.path.isfile(file_path):
                continue
            if file_name.endswith("_info.txt"):
                continue
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
            file_info = File(file_path=file_path,
                             guest_paths=metastring,
                             file_name=file_name).get_all()
            metastrings = metastring.split(",")
            file_info["process_path"] = metastrings[2]
            file_info["module_path"] = metastrings[3]
            file_info["process_name"] = file_info["process_path"].split(
                "\\")[-1]
            file_info["pid"] = metastrings[1]
            file_info["cape_type"] = "PE image"
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
            elif type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
            if type_strings[2] == ("(DLL)"):
                file_info["cape_type"] += "DLL"
            else:
                file_info["cape_type"] += "executable"
            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            readit = False
            for texttype in texttypes:
                if texttype in file_info["type"]:
                    readit = True
                    break
            if readit:
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                if len(filedata) > buf:
                    file_info["data"] = convert_to_printable(filedata[:buf] +
                                                             " <truncated>")
                else:
                    file_info["data"] = convert_to_printable(filedata)

            procdump_files.append(file_info)

        return procdump_files
Example #18
0
    def process_file(self, file_path, CAPE_files, append_file):
        """Process file.
        @return: file_info
        """
        global cape_config
        cape_name = ""
        strings = []

        buf = self.options.get("buffer", BUFSIZE)

        if file_path.endswith("_info.txt"):
            return

        texttypes = [
            "ASCII",
            "Windows Registry text",
            "XML document text",
            "Unicode text",
        ]

        if os.path.exists(file_path + "_info.txt"):
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
        else:
            metastring = ""

        file_info = File(file_path, metastring).get_all()

        # Get the file data
        with open(file_info["path"], "r") as file_open:
            filedata = file_open.read(buf + 1)
        if len(filedata) > buf:
            file_info["data"] = binascii.b2a_hex(filedata[:buf] +
                                                 " <truncated>")
        else:
            file_info["data"] = binascii.b2a_hex(filedata)

        metastrings = metastring.split(",")
        if len(metastrings) > 1:
            file_info["pid"] = metastrings[1]
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[2]
            file_info["process_name"] = metastrings[2].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[3]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""

        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"
            if file_info["cape_type_code"] == INJECTION_PE:
                file_info["cape_type"] = "Injected PE Image"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SHELLCODE:
                file_info["cape_type"] = "Injected Shellcode/Data"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == EXTRACTION_PE:
                file_info["cape_type"] = "Extracted PE Image"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_SHELLCODE:
                file_info["cape_type"] = "Extracted Shellcode"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                plugx_parser = plugx.PlugXConfig()
                plugx_config = plugx_parser.parse_config(
                    filedata, len(filedata))
                if not "cape_config" in cape_config and plugx_config:
                    cape_config["cape_config"] = {}
                    for key, value in plugx_config.items():
                        cape_config["cape_config"].update({key: [value]})
                    cape_name = "PlugX"
                append_file = False
            if file_info["cape_type_code"] == PLUGX_PAYLOAD:
                file_info["cape_type"] = "PlugX Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            # EvilGrab
            if file_info["cape_type_code"] == EVILGRAB_PAYLOAD:
                file_info["cape_type"] = "EvilGrab Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if file_info["size"] == 256 or file_info["size"] == 260:
                    ConfigItem = "filepath"
                    ConfigData = format(filedata)
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            # Azzy
            if file_info["cape_type_code"] == AZZY_DATA:
                cape_name = "Azzy"
                cape_config["cape_type"] = "Azzy Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if len(metastrings) > 4:
                    AzzyConfigIndex = metastrings[4]
                if AzzyConfigIndex == '0x0':
                    ConfigItem = "Timer1"
                elif AzzyConfigIndex == '0x1':
                    ConfigItem = "Timer2"
                elif AzzyConfigIndex == '0x2':
                    ConfigItem = "Computer Name"
                elif AzzyConfigIndex == '0x3':
                    ConfigItem = "C&C1"
                elif AzzyConfigIndex == '0x4':
                    ConfigItem = "C&C2"
                elif AzzyConfigIndex == '0x5':
                    ConfigItem = "Operation Name"
                elif AzzyConfigIndex == '0x6':
                    ConfigItem = "Keylogger MaxBuffer"
                elif AzzyConfigIndex == '0x7':
                    ConfigItem = "Keylogger MaxTimeout"
                elif AzzyConfigIndex == '0x8':
                    ConfigItem = "Keylogger Flag"
                elif AzzyConfigIndex == '0x9':
                    ConfigItem = "C&C3"
                else:
                    ConfigItem = "Unknown"
                ConfigData = format(filedata)
                if ConfigData:
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                append_file = False
            # UPX
            if file_info["cape_type_code"] == UPX:
                file_info["cape_type"] = "Unpacked PE Image"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            cape_name = hit["name"]
            try:
                file_info["cape_type"] = hit["meta"]["cape_type"]
            except:
                file_info["cape_type"] = cape_name + " Payload"
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # UPX Check and unpack
            if cape_name == 'UPX':
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                unpacked_file = upx_unpack(filedata)
                if unpacked_file and os.path.exists(unpacked_file):
                    unpacked_yara = File(unpacked_file).get_yara(
                        CAPE_YARA_RULEPATH)
                    for unpacked_hit in unpacked_yara:
                        unpacked_name = unpacked_hit["name"]
                        if unpacked_name == 'UPX':
                            # Failed to unpack
                            log.info("CAPE: Failed to unpack UPX")
                            os.unlink(unpacked_file)
                            break
                    if not os.path.exists(self.CAPE_path):
                        os.makedirs(self.CAPE_path)
                    newname = os.path.join(self.CAPE_path,
                                           os.path.basename(unpacked_file))
                    os.rename(unpacked_file, newname)
                    infofd = open(newname + "_info.txt", "a")
                    infofd.write(os.path.basename(unpacked_file) + "\n")
                    infofd.close()

                    # Recursive process of unpacked file
                    upx_extract = self.process_file(newname, CAPE_files, True)
                    if upx_extract["type"]:
                        upx_extract["cape_type"] = "UPX-extracted "
                        type_strings = upx_extract["type"].split()
                        if type_strings[0] == ("PE32+"):
                            upx_extract["cape_type"] += " 64-bit "
                            if type_strings[2] == ("(DLL)"):
                                upx_extract["cape_type"] += "DLL"
                            else:
                                upx_extract["cape_type"] += "executable"
                        if type_strings[0] == ("PE32"):
                            upx_extract["cape_type"] += " 32-bit "
                            if type_strings[2] == ("(DLL)"):
                                upx_extract["cape_type"] += "DLL"
                            else:
                                upx_extract["cape_type"] += "executable"

            # Attempt to import a parser for the yara hit
            # DC3-MWCP
            try:
                mwcp = malwareconfigreporter.malwareconfigreporter()
                kwargs = {}
                mwcp.run_parser(cape_name, data=filedata, **kwargs)
                if mwcp.errors == []:
                    log.info("CAPE: Imported DC3-MWCP parser %s", cape_name)
                    mwcp_loaded = True
                else:
                    error_lines = mwcp.errors[0].split("\n")
                    for line in error_lines:
                        if line.startswith('ImportError: '):
                            log.info("CAPE: DC3-MWCP parser: %s",
                                     line.split(': ')[1])
                    mwcp_loaded = False
            except ImportError:
                mwcp_loaded = False
            # malwareconfig
            try:
                malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules",
                                                     "processing", "parsers",
                                                     "malwareconfig")
                file, pathname, description = imp.find_module(
                    cape_name, [malwareconfig_parsers])
                module = imp.load_module(cape_name, file, pathname,
                                         description)
                malwareconfig_loaded = True
                log.info("CAPE: Imported malwareconfig.com parser %s",
                         cape_name)
            except ImportError:
                #log.info("CAPE: No malwareconfig.com parser for %s", cape_name)
                malwareconfig_loaded = False

            # Get config data
            if mwcp_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                        cape_config["cape_config"] = convert(mwcp.metadata)
                    else:
                        cape_config["cape_config"].update(
                            convert(mwcp.metadata))
                except Exception as e:
                    log.error(
                        "CAPE: DC3-MWCP config parsing error with %s: %s",
                        cape_name, e)
            elif malwareconfig_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                    malwareconfig_config = module.config(filedata)
                    if isinstance(malwareconfig_config, list):
                        for (key,
                             value) in module.config(filedata)[0].iteritems():
                            cape_config["cape_config"].update({key: [value]})
                    elif isinstance(malwareconfig_config, dict):
                        for (key,
                             value) in module.config(filedata).iteritems():
                            cape_config["cape_config"].update({key: [value]})
                except Exception as e:
                    log.error("CAPE: malwareconfig parsing error with %s: %s",
                              cape_name, e)

        if cape_name:
            cape_config["cape_name"] = format(cape_name)
            if not "cape" in self.results:
                #self.results["cape"] = []
                self.results["cape"] = cape_name
            #if cape_name not in self.results["cape"]:
            #    self.results["cape"].append(cape_name)

        if append_file == True:
            CAPE_files.append(file_info)
        return file_info
Example #19
0
    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup",
                                                       False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_sha256()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError(
                        "Failed to compile urlscrub regex" % (e))
                try:
                    resource = re.sub(urlscrub_compiled_re, "", resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))

            # normalize the URL the way VT appears to
            if not resource.lower().startswith(
                    "http://") and not resource.lower().startswith("https://"):
                resource = "http://" + resource
            slashsplit = resource.split('/')
            slashsplit[0] = slashsplit[0].lower()
            slashsplit[2] = slashsplit[2].lower()
            if len(slashsplit) == 3:
                slashsplit.append("")
            resource = "/".join(slashsplit)

            resource = hashlib.sha256(resource).hexdigest()
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url,
                             params=data,
                             verify=True,
                             timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))

        # Work around VT brain-damage
        if isinstance(virustotal, list) and len(virustotal):
            virustotal = virustotal[0]

        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["resource"] = resource
            virustotal["results"] = list(({
                "vendor": engine.replace(".", "_"),
                "sig": signature["result"]
            }) for engine, signature in items)
        return virustotal
Example #20
0
    def process_file(self, file_path, append_file, metadata=None):
        """Process file.
        @return: file_info
        """

        if metadata is None:
            metadata = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        cape_names = set()

        if pefile_object:
            self.results.setdefault("pefiles",
                                    {}).setdefault(file_info["sha256"],
                                                   pefile_object)

        if file_info.get("clamav") and processing_conf.detections.clamav:
            clamav_detection = get_clamav_consensus(file_info["clamav"])
            if clamav_detection:
                add_family_detection(self.results, clamav_detection, "ClamAV",
                                     file_info["sha256"])

        # should we use dropped path here?
        static_file_info(
            file_info,
            file_path,
            str(self.task["id"]),
            self.task.get("package", ""),
            self.task.get("options", ""),
            self.self_extracted,
            self.results,
        )

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        if metadata.get("pids", False):
            file_info["pid"] = metadata["pids"][0] if len(
                metadata["pids"]) == 1 else ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            elif file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            elif file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].rsplit(
                        "\\", 1)[-1]
                    file_info["target_pid"] = metastrings[4]

            elif file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True

            # PlugX
            elif file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        self.update_cape_configs(cape_name, plugx_config)
                        cape_names.add(cape_name)
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled"
                        )
                    append_file = False

            # Attempt to decrypt script dump
            elif file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                            with open(filepath, "w") as cfile:
                                cfile.write(bindata)
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload"
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits

        # Prefilter extracted data + beauty is better than oneliner:
        all_files = []
        for extracted_file in file_info.get("extracted_files", []):
            yara_hits = extracted_file["cape_yara"]
            if not yara_hits:
                continue
            if extracted_file.get("data", b""):
                extracted_file_data = make_bytes(extracted_file["data"])
            else:
                with open(extracted_file["path"], "rb") as fil:
                    extracted_file_data = fil.read()
            for yara in yara_hits:
                all_files.append((
                    f"[{extracted_file.get('sha256', '')}]{file_info['path']}",
                    extracted_file_data,
                    yara,
                ))

        for yara in file_info["cape_yara"]:
            all_files.append((file_info["path"], file_data, yara))

        executed_config_parsers = collections.defaultdict(set)
        for tmp_path, tmp_data, hit in all_files:
            # Check for a payload or config hit
            try:
                if File.yara_hit_provides_detection(hit):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = File.get_cape_name_from_yara_hit(hit)
                    cape_names.add(cape_name)
            except Exception as e:
                print(f"Cape type error: {e}")
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    file_info["cape_type"] += "DLL" if type_strings[2] == (
                        "(DLL)") else "executable"

            if cape_name and cape_name not in executed_config_parsers[tmp_path]:
                tmp_config = static_config_parsers(cape_name, tmp_path,
                                                   tmp_data)
                self.update_cape_configs(cape_name, tmp_config)
                executed_config_parsers[tmp_path].add(cape_name)

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_cape_name = File.get_cape_name_from_cape_type(type_string)
            if tmp_cape_name and tmp_cape_name not in executed_config_parsers:
                tmp_config = static_config_parsers(tmp_cape_name,
                                                   file_info["path"],
                                                   file_data)
                if tmp_config:
                    cape_name = tmp_cape_name
                    cape_names.add(cape_name)
                    log.info("CAPE: config returned for: %s", cape_name)
                    self.update_cape_configs(cape_name, tmp_config)

        self.add_family_detections(file_info, cape_names)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(),
                                                  cape_file["ssdeep"].encode())
                    if ssdeep_grade >= ssdeep_threshold:
                        log.debug(
                            "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d",
                            ssdeep_grade, ssdeep_threshold)
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info["entrypoint"] == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug(
                            "CAPE duplicate output file skipped: matching entrypoint"
                        )
                        append_file = False

        if append_file:
            if HAVE_FLARE_CAPA:
                pretime = timeit.default_timer()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)
Example #21
0
def tasks_create_file():
    response = {}

    data = request.files.file
    pcap = request.POST.get("pcap", "")
    package = request.forms.get("package", "")
    timeout = request.forms.get("timeout", "")
    priority = request.forms.get("priority", 1)
    options = request.forms.get("options", "")
    machine = request.forms.get("machine", "")
    platform = request.forms.get("platform", "")
    tags = request.forms.get("tags", None)
    custom = request.forms.get("custom", "")
    memory = request.forms.get("memory", "False")
    clock = request.forms.get("clock",
                              datetime.now().strftime("%m-%d-%Y %H:%M:%S"))
    if clock is False or clock is None:
        clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S")
    if "1970" in clock:
        clock = datetime.now().strftime("%m-%d-%Y %H:%M:%S")
    shrike_url = request.forms.get("shrike_url", None)
    shrike_msg = request.forms.get("shrike_msg", None)
    shrike_sid = request.forms.get("shrike_sid", None)
    shrike_refer = request.forms.get("shrike_refer", None)
    static = bool(request.POST.get("static", False))
    unique = bool(request.forms.get("unique", False))
    if memory.upper() == "FALSE" or memory == "0":
        memory = False
    else:
        memory = True

    enforce_timeout = request.forms.get("enforce_timeout", "False")
    if enforce_timeout.upper() == "FALSE" or enforce_timeout == "0":
        enforce_timeout = False
    else:
        enforce_timeout = True

    temp_file_path = store_temp_file(data.file.read(), data.filename)

    if unique and db.check_file_uniq(File(temp_file_path).get_sha256()):
        resp = {
            "error":
            True,
            "error_value":
            "Duplicated file, disable unique option to force submission"
        }
        return jsonize(resp)

    if pcap:
        if data.filename.lower().endswith(".saz"):
            saz = saz_to_pcap(temp_file_path)
            if saz:
                path = saz
                try:
                    os.remove(temp_file_path)
                except:
                    pass
            else:
                resp = {
                    "error": True,
                    "error_value": "Failed to convert PCAP to SAZ"
                }
                return jsonize(resp)
        else:
            path = temp_file_path
        task_id = db.add_pcap(file_path=path)
        task_ids = [task_id]
    else:

        try:
            task_ids, extra_details = db.demux_sample_and_add_to_db(
                file_path=temp_file_path,
                package=package,
                timeout=timeout,
                options=options,
                priority=priority,
                machine=machine,
                platform=platform,
                custom=custom,
                memory=memory,
                enforce_timeout=enforce_timeout,
                tags=tags,
                clock=clock,
                shrike_url=shrike_url,
                shrike_msg=shrike_msg,
                shrike_sid=shrike_sid,
                shrike_refer=shrike_refer,
                static=static,
            )
        except CuckooDemuxError as e:
            return HTTPError(500, e)

    response["task_ids"] = task_ids
    return jsonize(response)
Example #22
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if not "network" in report:
            report["network"] = {}
        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        if "procmemory" in report:
            # Store the process memory dump file in GridFS and reference it back in the report.
            for idx, procmem in enumerate(report['procmemory']):
                procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid']))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update({"procmem_id": procmem_id})

        # Store the suri extracted files in GridFS and reference it back in the report.
        suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip")
        suri_extracted_zip = File(suri_extracted_zip_path)
        if suri_extracted_zip.valid():
            suri_extracted_zip_id = self.store_file(suri_extracted_zip)
            report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id}
            report["suricata"].update(results["suricata"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Store the Zipped Droppings file in GridFS and reference it back in the report.
        #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip")
        #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path)
        #if cuckoo_dropped_zip.valid():
        #    cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip)
        #    report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id}
        #    report["zippeddroppings"].update(results["zippeddroppings"])


        # Walk through the suricata extracted files, store them in GridFS and update the
        # report with the ObjectIds.
        new_suricata_files = []
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("files") and results["suricata"]["files"]:
                for suricata_file_e in results["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        suricata_file = File(suricata_file_e["file_info"]["path"])
                        if suricata_file.valid():
                            suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"])
                            tmp_suricata_file_d["object_id"] = suricata_file_id
                            new_suricata_files.append(tmp_suricata_file_d)

                report["suricata"]["files"] = new_suricata_files

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {"pid": process["process_id"],
                                     "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            child_key, csize = self.debug_dict_size(report[parent_key])[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                while error_saved:
                    log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" %
                             (parent_key, child_key, int(psize) / 1048576))
                    del report[parent_key][child_key]
                    try:
                        self.db.analysis.save(report)
                        error_saved = False
                    except InvalidDocument as e:
                        parent_key, psize = self.debug_dict_size(report)[0]
                        child_key, csize = self.debug_dict_size(report[parent_key])[0]
                        log.error(str(e))
                        log.error("Largest parent key: %s (%d bytes)" % (parent_key, int(psize) / 1048576))
                        log.error("Largest child key: %s (%d bytes)" % (child_key, int(csize) / 1048576))

        self.conn.close()
Example #23
0
    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""

        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)
        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        if pefile_object:
            self.results.setdefault("pefiles", {})
            self.results["pefiles"].setdefault(file_info["sha256"],
                                               pefile_object)

        # Get the file data
        try:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()
        except UnicodeDecodeError as e:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] == INJECTION_SECTION:
                file_info["cape_type"] = "Injected Section"
                if len(metastrings) > 4:
                    file_info["section_handle"] = metastrings[4]

            simple_cape_type_map = {
                UNPACKED_PE: "Unpacked PE Image",
                UNPACKED_SHELLCODE: "Unpacked Shellcode",
            }
            if file_info["cape_type_code"] in simple_cape_type_map:
                file_info["cape_type"] = simple_cape_type_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()
            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = dict()
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled."
                        )
                    append_file = False
            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                if file_info["cape_type_code"] in config_mapping:
                    file_info["cape_type"] = code_mapping[
                        file_info["cape_type_code"]]

                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]

                append_file = True

            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if file_info["size"] == 256 or file_info["size"] == 260:
                    config[cape_name].update({"filepath": [format(file_data)]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            if file_info["cape_type_code"] == SEDRECO_DATA:
                cape_name = "Sedreco"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Sedreco Config"
                if len(metastrings) > 4:
                    SedrecoConfigIndex = metastrings[4]
                    if SedrecoConfigIndex in sedreco_map:
                        ConfigItem = sedreco_map[SedrecoConfigIndex]
                    else:
                        ConfigItem = "Unknown"

                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                append_file = False

            if file_info["cape_type_code"] == CERBER_CONFIG:
                file_info["cape_type"] = "Cerber Config"
                cape_name = "Cerber"
                config[cape_name] = dict()
                config["cape_type"] = "Cerber Config"

                parsed = json.loads(file_data.rstrip(b"\0"))
                config[cape_name].update({
                    "JSON Data":
                    [json.dumps(parsed, indent=4, sort_keys=True)]
                })
                append_file = True

            if file_info["cape_type_code"] == URSNIF_PAYLOAD:
                cape_name = "Ursnif"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Ursnif Payload"

                file_info["cape_type"] = "Ursnif Payload"
            if file_info["cape_type_code"] == URSNIF_CONFIG:
                file_info["cape_type"] = "Ursnif Config"
                cape_name = "Ursnif"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s",
                              cape_name)
                except ImportError:
                    log.debug(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        malwareconfig_config = module.config(file_data)
                        if malwareconfig_config:
                            config[cape_name] = dict()
                            config[cape_name]["cape_type"] = "Ursnif Config"
                            if isinstance(malwareconfig_config, list):
                                for (key,
                                     value) in malwareconfig_config[0].items():
                                    config[cape_name].update({key: [value]})
                            elif isinstance(malwareconfig_config, dict):
                                for (key,
                                     value) in malwareconfig_config.items():
                                    config[cape_name].update({key: [value]})
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = False
            # Hancitor
            if file_info["cape_type_code"] == HANCITOR_PAYLOAD:
                cape_name = "Hancitor"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Payload"
                file_info["cape_type"] = "Hancitor Payload"
            if file_info["cape_type_code"] == HANCITOR_CONFIG:
                cape_name = "Hancitor"
                file_info["cape_type"] = "Hancitor Config"
                ConfigStrings = file_data.split(b"\0")
                ConfigStrings = [_f for _f in ConfigStrings if _f]
                ConfigItem = "Campaign Code"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Config"
                config[cape_name].update({ConfigItem: [ConfigStrings[0]]})
                GateURLs = ConfigStrings[1].split(b"|")
                for index, value in enumerate(GateURLs):
                    ConfigItem = "Gate URL " + str(index + 1)
                    config[cape_name].update({ConfigItem: [value]})
                append_file = False
            # QakBot
            if file_info["cape_type_code"] == QAKBOT_CONFIG:
                file_info["cape_type"] = "QakBot Config"
                cape_name = "QakBot"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "QakBot Config"
                config_tmp = static_config_parsers(cape_name, file_data)
                if config_tmp and config_tmp[cape_name]:
                    config.update(config_tmp)
                append_file = False
            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s",
                              cape_name)
                except ImportError:
                    log.debug(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += "," + file_info["process_path"]
                            tmpstr += "," + file_info["module_path"]
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = str(
                                    MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = str(
                                    MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload."
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ["payload", "config", "loader"]
            try:
                for type in extraction_types:
                    if type in hit["meta"].get("cape_type", "").lower():
                        file_info["cape_type"] = hit["meta"]["cape_type"]
                        cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print("Cape type error: {}".format(e))
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            suppress_parsing_list = ["Cerber", "Ursnif"]

            if hit["name"] in suppress_parsing_list:
                continue

            tmp_config = static_config_parsers(hit["name"].replace("_", " "),
                                               file_data)
            if tmp_config and tmp_config[hit["name"].replace("_", " ")]:
                config.update(tmp_config)

        if cape_name:
            if not "detections" in self.results:
                if cape_name != "UPX":
                    #ToDo list of keys
                    self.results["detections"] = cape_name

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(
                        file_info["ssdeep"].encode("utf-8"),
                        cape_file["ssdeep"].encode("utf-8"))
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info.get("entrypoint") and file_info["entrypoint"]
                            == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file is True:
            pretime = datetime.now()
            capa_details = flare_capa_details(file_path, "CAPE")
            if capa_details:
                file_info["flare_capa"] = capa_details
            self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            self.cape["configs"].append(config)
Example #24
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        for dropped in report["dropped"]:
            new_drop = dict(dropped)
            drop = File(dropped["path"])
            if drop.valid():
                dropped_id = self.store_file(drop, filename=dropped["name"])
                new_drop["object_id"] = dropped_id

            new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Store the Zipped Droppings file in GridFS and reference it back in the report.
        #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip")
        #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path)
        #if cuckoo_dropped_zip.valid():
        #    cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip)
        #    report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id}
        #    report["zippeddroppings"].update(results["zippeddroppings"])


        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"],
                                 "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if report.has_key("virustotal") and report["virustotal"] and report["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (report["virustotal"]["positives"],report["virustotal"]["total"])

        new_suricata_files = []
        if report.has_key("suricata") and report["suricata"]:
            suricata={}
            suricata["info"]={}
            suricata["info"]["id"]=report["info"]["id"]
            # Walk through the suricata extracted files, store them in GridFS and update the
            # report with the ObjectIds
            # Store the suri extracted files in GridFS and reference it back in the report.
            suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip")
            suri_extracted_zip = File(suri_extracted_zip_path)
            if suri_extracted_zip.valid():
                suri_extracted_zip_id = self.store_file(suri_extracted_zip)
                suricata["suri_extracted_zip"]=suri_extracted_zip_id

            if report["suricata"].has_key("files") and len(report["suricata"]["files"]) > 0:
                suricata["file_cnt"] = len(report["suricata"]["files"])
                for suricata_file_e in report["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        suricata_file = File(suricata_file_e["file_info"]["path"])
                        if suricata_file.valid():
                            suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"])
                            tmp_suricata_file_d["object_id"] = suricata_file_id
                            new_suricata_files.append(tmp_suricata_file_d)
                suricata["files"] = new_suricata_files
            if report["suricata"].has_key("alert_log_full_path") and report["suricata"]["alert_log_full_path"]:
                 suricata_alert_log = File(report["suricata"]["alert_log_full_path"])
                 if suricata_alert_log.valid():
                    suricata_alert_log_id = self.store_file(suricata_alert_log)
                    suricata["alert_log_id"] = suricata_alert_log_id

            if report["suricata"].has_key("tls_log_full_path") and report["suricata"]["tls_log_full_path"]:
                tls_log = File(report["suricata"]["tls_log_full_path"])
                if tls_log.valid():
                    tls_log_id = self.store_file(tls_log)
                    suricata["tls_log_id"] = tls_log_id

            if report["suricata"].has_key("http_log_full_path") and report["suricata"]["http_log_full_path"]:
                http_log = File(report["suricata"]["http_log_full_path"])
                if http_log.valid():
                    http_log_id = self.store_file(http_log)
                    suricata["http_log_id"] = http_log_id

            if report["suricata"].has_key("file_log_full_path") and report["suricata"]["file_log_full_path"]:
                file_log = File(report["suricata"]["file_log_full_path"])
                if file_log.valid():
                    file_log_id = self.store_file(file_log)
                    suricata["file_log_id"] = file_log_id
            if report["suricata"].has_key("dns_log_full_path") and report["suricata"]["dns_log_full_path"]:
                dns_log = File(report["suricata"]["dns_log_full_path"])
                if dns_log.valid():
                    dns_log_id = self.store_file(dns_log)
                    suricata["dns_log_id"] = dns_log_id
            if report["suricata"].has_key("ssh_log_full_path") and report["suricata"]["ssh_log_full_path"]:
                ssh_log = File(report["suricata"]["ssh_log_full_path"])
                if ssh_log.valid():
                    ssh_log_id = self.store_file(ssh_log)
                    suricata["ssh_log_id"] = ssh_log_id

            if report["suricata"].has_key("tls") and len(report["suricata"]["tls"]) > 0:
                suricata["tls_cnt"] = len(report["suricata"]["tls"])
                suricata["tls"]=report["suricata"]["tls"]
            if report["suricata"] and report["suricata"].has_key("alerts") and len(report["suricata"]["alerts"]) > 0:
                suricata["alert_cnt"] = len(report["suricata"]["alerts"])
                suricata["alerts"]=report["suricata"]["alerts"]
            if results["suricata"].has_key("http") and len(report["suricata"]["http"]) > 0:
                suricata["http_cnt"] = len(report["suricata"]["http"])
                suricata["http"]=report["suricata"]["http"]
            self.db.suricata.save(suricata)
            #do not store this in analysis collection
            del report["suricata"]
        if results.has_key("behavior") and results["behavior"].has_key("martianlist") and results["behavior"]["martianlist"] and len(results["behavior"]["martianlist"]) > 0:
            report["mlist_cnt"] = len(results["behavior"]["martianlist"])

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()
Example #25
0
    def run(self):
        """Run Suricata.
        @return: hash with alerts
        """
        self.key = "suricata"
        # General
        SURICATA_CONF = self.options.get("conf", None)
        SURICATA_EVE_LOG = self.options.get("evelog", None)
        SURICATA_ALERT_LOG = self.options.get("alertlog", None)
        SURICATA_TLS_LOG = self.options.get("tlslog", None)
        SURICATA_HTTP_LOG = self.options.get("httplog", None)
        SURICATA_SSH_LOG = self.options.get("sshlog", None)
        SURICATA_DNS_LOG = self.options.get("dnslog", None)
        SURICATA_FILE_LOG = self.options.get("fileslog", None)
        SURICATA_FILES_DIR = self.options.get("filesdir", None)
        SURICATA_RUNMODE = self.options.get("runmode", None)
        SURICATA_FILE_BUFFER = self.options.get("buffer", 8192)
        Z7_PATH = self.options.get("7zbin", None)
        FILES_ZIP_PASS = self.options.get("zippass", None)
        SURICATA_FILE_COPY_DST_DIR = self.options.get("file_copy_dest_dir",
                                                      None)
        SURICATA_FILE_COPY_MAGIC_RE = self.options.get("file_magic_re", None)
        if SURICATA_FILE_COPY_MAGIC_RE:
            try:
                SURICATA_FILE_COPY_MAGIC_RE = re.compile(
                    SURICATA_FILE_COPY_MAGIC_RE)
            except:
                log.warning("Failed to compile suricata copy magic RE" %
                            (SURICATA_FILE_COPY_MAGIC_RE))
                SURICATA_FILE_COPY_MAGIC_RE = None
        # Socket
        SURICATA_SOCKET_PATH = self.options.get("socket_file", None)
        SURICATA_SOCKET_PYLIB = self.options.get("pylib_dir", None)

        # Command Line
        SURICATA_BIN = self.options.get("bin", None)

        suricata = {}
        suricata["alerts"] = []
        suricata["tls"] = []
        suricata["perf"] = []
        suricata["files"] = []
        suricata["http"] = []
        suricata["dns"] = []
        suricata["ssh"] = []
        suricata["file_info"] = []

        suricata["eve_log_full_path"] = None
        suricata["alert_log_full_path"] = None
        suricata["tls_log_full_path"] = None
        suricata["http_log_full_path"] = None
        suricata["file_log_full_path"] = None
        suricata["ssh_log_full_path"] = None
        suricata["dns_log_full_path"] = None

        SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_ALERT_LOG)
        SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_TLS_LOG)
        SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_HTTP_LOG)
        SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_SSH_LOG)
        SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_DNS_LOG)
        SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_EVE_LOG)
        SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_FILE_LOG)
        SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_FILES_DIR)

        if not os.path.exists(SURICATA_CONF):
            log.warning("Unable to Run Suricata: Conf File %s Does Not Exist" %
                        (SURICATA_CONF))
            return suricata["alerts"]
        if not os.path.exists(self.pcap_path):
            log.warning("Unable to Run Suricata: Pcap file %s Does Not Exist" %
                        (self.pcap_path))
            return suricata["alerts"]

        # Add to this if you wish to ignore any SIDs for the suricata alert logs
        # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for
        # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert)
        sid_blacklist = [
            # SURICATA FRAG IPv6 Fragmentation overlap
            2200074,
            # ET INFO InetSim Response from External Source Possible SinkHole
            2017363,
            # SURICATA UDPv4 invalid checksum
            2200075,
            # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack
            2019416,
        ]

        if SURICATA_RUNMODE == "socket":
            if SURICATA_SOCKET_PYLIB != None:
                sys.path.append(SURICATA_SOCKET_PYLIB)
            try:
                from suricatasc import SuricataSC
            except Exception as e:
                log.warning("Failed to import suricatasc lib %s" % (e))
                return suricata

            loopcnt = 0
            maxloops = 24
            loopsleep = 5

            args = {}
            args["filename"] = self.pcap_path
            args["output-dir"] = self.logs_path

            suris = SuricataSC(SURICATA_SOCKET_PATH)
            try:
                suris.connect()
                suris.send_command("pcap-file", args)
            except Exception as e:
                log.warning(
                    "Failed to connect to socket and send command %s: %s" %
                    (SURICATA_SOCKET_PATH, e))
                return suricata
            while loopcnt < maxloops:
                try:
                    pcap_flist = suris.send_command("pcap-file-list")
                    current_pcap = suris.send_command("pcap-current")
                    log.debug("pcapfile list: %s current pcap: %s" %
                              (pcap_flist, current_pcap))

                    if self.pcap_path not in pcap_flist["message"][
                            "files"] and current_pcap[
                                "message"] != self.pcap_path:
                        log.debug(
                            "Pcap not in list and not current pcap lets assume it's processed"
                        )
                        break
                    else:
                        loopcnt = loopcnt + 1
                        time.sleep(loopsleep)
                except Exception as e:
                    log.warning(
                        "Failed to get pcap status breaking out of loop %s" %
                        (e))
                    break

            if loopcnt == maxloops:
                log.warning(
                    "Loop timeout of %ssec occured waiting for file %s to finish processing"
                    % (maxloops * loopsleep, pcapfile))
                return suricata
        elif SURICATA_RUNMODE == "cli":
            if not os.path.exists(SURICATA_BIN):
                log.warning(
                    "Unable to Run Suricata: Bin File %s Does Not Exist" %
                    (SURICATA_CONF))
                return suricata["alerts"]
            cmd = "%s -c %s -k none -l %s -r %s" % (
                SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret != 0:
                log.warning(
                    "Suricata returned a Exit Value Other than Zero %s" %
                    (stderr))
                return suricata

        else:
            log.warning("Unknown Suricata Runmode")
            return suricata

        datalist = []
        if os.path.exists(SURICATA_EVE_LOG_FULL_PATH):
            suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH
            with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log:
                datalist.append(eve_log.read())
        else:
            paths = [("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH),
                     ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH),
                     ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH),
                     ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH),
                     ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)]
            for path in paths:
                if os.path.exists(path[1]):
                    suricata[path[0]] = path[1]
                    with open(path[1], "rb") as the_log:
                        datalist.append(the_log.read())

        if not datalist:
            log.warning("Suricata: Failed to find usable Suricata log file")

        for data in datalist:
            for line in data.splitlines():
                try:
                    parsed = json.loads(line)
                except:
                    log.warning("Suricata: Failed to parse line as json" %
                                (line))
                    continue

                if parsed["event_type"] == "alert":
                    if (parsed["alert"]["signature_id"] not in sid_blacklist
                            and not parsed["alert"]["signature"].startswith(
                                "SURICATA STREAM")):
                        alog = dict()
                        if parsed["alert"]["gid"] == '':
                            alog["gid"] = "None"
                        else:
                            alog["gid"] = parsed["alert"]["gid"]
                        if parsed["alert"]["rev"] == '':
                            alog["rev"] = "None"
                        else:
                            alog["rev"] = parsed["alert"]["rev"]
                        if parsed["alert"]["severity"] == '':
                            alog["severity"] = "None"
                        else:
                            alog["severity"] = parsed["alert"]["severity"]
                        alog["sid"] = parsed["alert"]["signature_id"]
                        alog["srcport"] = parsed["src_port"]
                        alog["srcip"] = parsed["src_ip"]
                        alog["dstport"] = parsed["dest_port"]
                        alog["dstip"] = parsed["dest_ip"]
                        alog["protocol"] = parsed["proto"]
                        alog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        if parsed["alert"]["category"] == '':
                            alog["category"] = "None"
                        else:
                            alog["category"] = parsed["alert"]["category"]
                        alog["signature"] = parsed["alert"]["signature"]
                        suricata["alerts"].append(alog)

                elif parsed["event_type"] == "http":
                    hlog = dict()
                    hlog["srcport"] = parsed["src_port"]
                    hlog["srcip"] = parsed["src_ip"]
                    hlog["dstport"] = parsed["dest_port"]
                    hlog["dstip"] = parsed["dest_ip"]
                    hlog["timestamp"] = parsed["timestamp"].replace("T", " ")
                    try:
                        hlog["uri"] = parsed["http"]["url"]
                    except:
                        hlog["uri"] = "None"
                    hlog["length"] = parsed["http"]["length"]
                    try:
                        hlog["hostname"] = parsed["http"]["hostname"]
                    except:
                        hlog["hostname"] = "None"
                    try:
                        hlog["status"] = str(parsed["http"]["status"])
                    except:
                        hlog["status"] = "None"
                    try:
                        hlog["method"] = parsed["http"]["http_method"]
                    except:
                        hlog["method"] = "None"
                    try:
                        hlog["contenttype"] = parsed["http"][
                            "http_content_type"]
                    except:
                        hlog["contenttype"] = "None"
                    try:
                        hlog["ua"] = parsed["http"]["http_user_agent"]
                    except:
                        hlog["ua"] = "None"
                    try:
                        hlog["referrer"] = parsed["http"]["http_refer"]
                    except:
                        hlog["referrer"] = "None"
                    suricata["http"].append(hlog)

                elif parsed["event_type"] == "tls":
                    tlog = dict()
                    tlog["srcport"] = parsed["src_port"]
                    tlog["srcip"] = parsed["src_ip"]
                    tlog["dstport"] = parsed["dest_port"]
                    tlog["dstip"] = parsed["dest_ip"]
                    tlog["timestamp"] = parsed["timestamp"].replace("T", " ")
                    tlog["fingerprint"] = parsed["tls"]["fingerprint"]
                    tlog["issuer"] = parsed["tls"]["issuerdn"]
                    tlog["version"] = parsed["tls"]["version"]
                    tlog["subject"] = parsed["tls"]["subject"]
                    suricata["tls"].append(tlog)

                elif parsed["event_type"] == "ssh":
                    suricata["ssh"].append(parsed)
                elif parsed["event_type"] == "dns":
                    suricata["dns"].append(parsed)

        if os.path.exists(SURICATA_FILE_LOG_FULL_PATH):
            suricata["file_log_full_path"] = SURICATA_FILE_LOG_FULL_PATH
            f = open(SURICATA_FILE_LOG_FULL_PATH, "rb").readlines()
            for l in f:
                try:
                    d = json.loads(l)
                except:
                    log.warning("failed to load JSON from file log")
                    continue
                # Some log entries do not have an id
                if "id" not in d:
                    continue
                src_file = "%s/file.%s" % (SURICATA_FILES_DIR_FULL_PATH,
                                           d["id"])
                if os.path.exists(src_file):
                    if SURICATA_FILE_COPY_MAGIC_RE and SURICATA_FILE_COPY_DST_DIR and os.path.exists(
                            SURICATA_FILE_COPY_DST_DIR):
                        try:
                            m = re.search(SURICATA_FILE_COPY_MAGIC_RE,
                                          d["magic"])
                            if m:
                                dst_file = "%s/%s" % (
                                    SURICATA_FILE_COPY_DST_DIR, d["md5"])
                                shutil.copy2(src_file, dst_file)
                                log.warning("copied %s to %s" %
                                            (src_file, dst_file))
                        except Exception, e:
                            log.warning("Unable to copy suricata file: %s" % e)
                    file_info = File(file_path=src_file).get_all()
                    texttypes = [
                        "ASCII",
                        "Windows Registry text",
                        "XML document text",
                        "Unicode text",
                    ]
                    readit = False
                    for texttype in texttypes:
                        if texttype in file_info["type"]:
                            readit = True
                            break
                    if readit:
                        with open(file_info["path"], "rb") as drop_open:
                            filedata = drop_open.read(SURICATA_FILE_BUFFER + 1)
                        if len(filedata) > SURICATA_FILE_BUFFER:
                            file_info["data"] = convert_to_printable(
                                filedata[:SURICATA_FILE_BUFFER] +
                                " <truncated>")
                        else:
                            file_info["data"] = convert_to_printable(filedata)
                    d["file_info"] = file_info
                if "/" in d["filename"]:
                    d["filename"] = d["filename"].split("/")[-1]
                suricata["files"].append(d)
Example #26
0
    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)

        file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all()
        if pefile_object:
            self.results.setdefault("pefiles", {})
            self.results["pefiles"].setdefault(file_info["sha256"], pefile_object)

        # Allows to put execute file extractors/unpackers
        generic_file_extractors(file_path, self.dropped_path, file_info.get("type", ""), file_info)

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        is_text_file(file_info, self.CAPE_path, buf, file_data)

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].rsplit("\\", 1)[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True

                """
                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                """

            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = {}
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error("CAPE: PlugX config parsing failure - size many not be handled")
                    append_file = False

            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE")
                    file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname, description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += f",{file_info['process_path']}"
                            tmpstr += f",{file_info['module_path']}"
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = f"{MOREEGGSJS_PAYLOAD},{tmpstr}\n"
                                # with open(f"{filepath}_info.txt", "w") as infofd:
                                #    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = f"{MOREEGGSBIN_PAYLOAD},{tmpstr}\n"
                                # with open(f"{filepath}_info.txt", "w") as infofd:
                                #    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info("CAPE: Script Dump does not contain known encrypted payload")
                    except Exception as e:
                        log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info("CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ("payload", "config", "loader")

            try:
                if any([file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types]):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print(f"Cape type error: {e}")
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            if hit["name"] == "GuLoader":
                self.detect2pid(file_info["pid"], "GuLoader")

            cape_name = hit["name"].replace("_", " ")
            tmp_config = static_config_parsers(hit["name"], file_data)
            if tmp_config and tmp_config.get(cape_name):
                config.update(tmp_config[cape_name])

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_config = static_config_parsers(type_string.split(" ", 1)[0], file_data)
            if tmp_config:
                cape_name = type_string.split(" ", 1)[0]
                log.info("CAPE: config returned for: %s", cape_name)
                config.update(tmp_config)

        if cape_name:
            if "detections" not in self.results:
                if cape_name != "UPX":
                    # ToDo list of keys
                    self.results["detections"] = cape_name
            if file_info.get("pid"):
                self.detect2pid(file_info["pid"], cape_name)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode())
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"):
                    if (
                        file_info.get("entrypoint")
                        and file_info["entrypoint"] == cape_file["entrypoint"]
                        and file_info["cape_type_code"] == cape_file["cape_type_code"]
                        and file_info["ep_bytes"] == cape_file["ep_bytes"]
                    ):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file:
            if HAVE_FLARE_CAPA:
                pretime = datetime.now()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            if cape_name in multi_block_config and self.cape["configs"]:
                for conf in self.cape["configs"]:
                    if cape_name in conf:
                        conf[cape_name].update(config)
            else:
                # in case if malware name is missed it will break conf visualization
                if cape_name not in config:
                    config = {cape_name: config}
                if config not in self.cape["configs"]:
                    self.cape["configs"].append(config)
Example #27
0
    def launch_analysis(self):
        """Start analysis."""
        succeeded = False
        dead_machine = False
        self.socks5s = _load_socks5_operational()

        log.info("Task #{0}: Starting analysis of {1} '{2}'".format(
            self.task.id, self.task.category.upper(),
            convert_to_printable(self.task.target)))

        # Initialize the analysis folders.
        if not self.init_storage():
            log.debug("Failed to initialize the analysis folder")
            return False

        sha256 = File(self.task.target).get_sha256()

        if self.task.category in ["file", "pcap", "static"]:
            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            if not self.check_file(sha256):
                return False

            # Store a copy of the original file.
            if not self.store_file(sha256):
                return False

        if self.task.category in ("pcap", "static"):
            if self.task.category == "pcap":
                if hasattr(os, "symlink"):
                    os.symlink(self.binary,
                               os.path.join(self.storage, "dump.pcap"))
                else:
                    shutil.copy(self.binary,
                                os.path.join(self.storage, "dump.pcap"))
            # create the logs/files directories as
            # normally the resultserver would do it
            dirnames = ["logs", "files", "aux"]
            for dirname in dirnames:
                try:
                    os.makedirs(os.path.join(self.storage, dirname))
                except:
                    pass
            return True

        # Acquire analysis machine.
        try:
            self.acquire_machine()
            self.db.set_task_vm(self.task.id, self.machine.label,
                                self.machine.id)
        # At this point we can tell the ResultServer about it.
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Task #{0}: Cannot acquire machine: {1}".format(
                self.task.id, e))
            return False

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            log.exception(e)
            self.errors.put(e)

        aux = RunAuxiliary(task=self.task, machine=self.machine)

        try:
            unlocked = False

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            # Start the machine.
            machinery.start(self.machine.label)

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            aux.start()

            # Initialize the guest manager.
            guest = GuestManager(self.machine.name, self.machine.ip,
                                 self.machine.platform, self.task.id, self)

            options["clock"] = self.db.update_clock(self.task.id)
            self.db.guest_set_status(self.task.id, "starting")
            # Start the analysis.
            guest.start_analysis(options)
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                guest.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")
            succeeded = True
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id})
            dead_machine = True
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id})
        finally:
            # Stop Auxiliary modules.
            aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                try:
                    dump_path = get_memdump_path(self.task.id)
                    need_space, space_available = free_space_monitor(
                        os.path.dirname(dump_path), return_value=True)
                    if need_space:
                        log.error(
                            "Not enough free disk space! Could not dump ram (Only %d MB!)",
                            space_available)
                    else:
                        machinery.dump_memory(self.machine.label, dump_path)
                except NotImplementedError:
                    log.error("The memory dump functionality is not available "
                              "for the current machine manager.")

                except CuckooMachineError as e:
                    log.error(e)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)

            except CuckooMachineError as e:
                log.warning(
                    "Task #{0}: Unable to stop machine {1}: {2}".format(
                        self.task.id, self.machine.label, e))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            if dead_machine:
                # Remove the guest from the database, so that we can assign a
                # new guest when the task is being analyzed with another
                # machine.
                self.db.guest_remove(guest_log)

                # Remove the analysis directory that has been created so
                # far, as launch_analysis() is going to be doing that again.
                shutil.rmtree(self.storage)

                # This machine has turned dead, so we throw an exception here
                # which informs the AnalysisManager that it should analyze
                # this task again with another available machine.
                raise CuckooDeadMachine()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)

            except CuckooMachineError as e:
                log.error("Task #{0}: Unable to release machine {1}, reason "
                          "{2}. You might need to restore it manually.".format(
                              self.task.id, self.machine.label, e))

        return succeeded
Example #28
0
    def run(self, results):
        """Writes report.
        @param results: Cuckoo results dict.
        @raise CuckooReportError: if fails to write report.
        """
        if not HAVE_JINJA2:
            raise CuckooReportError(
                "Failed to generate HTML report: Jinja2 library is not "
                "installed (install `pip install jinja2`)")

        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = []
            counter = 1
            for shot_name in os.listdir(shots_path):
                if not shot_name.endswith(".jpg"):
                    continue

                shot_path = os.path.join(shots_path, shot_name)
                if not os.path.getsize(shot_path):
                    continue

                shot = {}
                shot["id"] = os.path.splitext(File(shot_path).get_name())[0]
                shot["data"] = base64.b64encode(open(shot_path, "rb").read())
                shots.append(shot)

                counter += 1

            shots.sort(key=lambda shot: shot["id"])
            results["screenshots"] = shots
        else:
            results["screenshots"] = []

        env = Environment(autoescape=True)
        env.loader = FileSystemLoader(os.path.join(CUCKOO_ROOT, "data",
                                                   "html"))

        processed = None
        mapping = [
            ("file_read", "File", "Read"),
            ("file_written", "File", "Written"),
            ("file_deleted", "File", "Deleted"),
            ("file_opened", "File", "Opened"),
            ("file_copied", "File", "Copied"),
            ("file_moved", "File", "Moved"),
            ("connects_ip", "Network", "Connects IP"),
            ("resolves_url", "Network", "Resolves URL"),
            ("fetches_url", "Network", "Fetches URL"),
            ("connects_host", "Network", "Connects Host"),
            ("downloads_file_url", "Network", "Downloads File URL"),
            ("directory_created", "Directory", "Created"),
            ("directory_removed", "Directory", "Removed"),
            ("directory_enumerated", "Directory", "Enumerated"),
            ("regkey_opened", "Registry Key", "Opened"),
            ("regkey_deleted", "Registry Key", "Deleted"),
            ("regkey_read", "Registry Key", "Read"),
            ("regkey_written", "Registry Key", "Written"),
            ("mutex", "Mutex", "Accessed"),
        ]

        processed = {}
        for proc in results.get("behavior", {}).get("generic", []):
            for orig, cat, subcat in mapping:
                if cat not in processed:
                    processed[cat] = {}

                if subcat not in processed[cat]:
                    processed[cat][subcat] = []

                # Special handling required for file moved/copied.
                if orig == "file_moved" or orig == "file_copied":
                    for src, dst in proc.get("summary", {}).get(orig, []):
                        entry = "%s -> %s" % (src, dst)
                        processed[cat][subcat].append(entry)
                    continue

                if "summary" in proc and orig in proc["summary"]:
                    for content in proc["summary"][orig]:
                        processed[cat][subcat].append(content)

        try:
            tpl = env.get_template("report.html")
            html = tpl.render({
                "results": results,
                "processed": processed,
                "mapping": mapping
            })
        except Exception as e:
            raise CuckooReportError("Failed to generate HTML report: %s" % e)

        try:
            report_path = os.path.join(self.reports_path, "report.html")
            with codecs.open(report_path, "w", encoding="utf-8") as report:
                report.write(html)
        except (TypeError, IOError) as e:
            raise CuckooReportError("Failed to write HTML report: %s" % e)

        return True
Example #29
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                if os.path.getsize(dmp_path) == 0:
                    continue

                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(
                    os.path.splitext(os.path.basename(dmp_path))[0])
                for process in self.results.get("behavior", {}).get(
                        "processes", []) or []:
                    if process_id == process["process_id"]:
                        process_name = process["process_name"]
                        process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(
                        os.path.join(CUCKOO_ROOT, "data", "yara",
                                     "index_memory.yar")),
                    address_space=procdump.pretty_print(),
                )
                endlimit = ""
                if not HAVE_RE2:
                    endlimit = "8192"

                if do_strings:
                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(
                            minchars) + "," + endlimit + "})\x00"
                        upat = "((?:[\x20-\x7e][\x00]){" + str(
                            minchars) + "," + endlimit + "})\x00\x00"
                    else:
                        apat = "[\x20-\x7e]{" + str(
                            minchars) + "," + endlimit + "}"
                        upat = "(?:[\x20-\x7e][\x00]){" + str(
                            minchars) + "," + endlimit + "}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(str(ws.decode("utf-16le")))

                    proc["strings_path"] = dmp_path + ".strings"
                    f = open(proc["strings_path"], "w")
                    f.write("\n".join(strings))
                    f.close()

                procdump.close()
                results.append(proc)
        return results
Example #30
0
    def process_file(self, file_path, CAPE_output, append_file):
        """Process file.
        @return: file_info
        """
        global cape_config
        cape_name = ""
        strings = []

        buf = self.options.get("buffer", BUFSIZE)

        if file_path.endswith("_info.txt"):
            return

        texttypes = [
            "ASCII",
            "Windows Registry text",
            "XML document text",
            "Unicode text",
        ]

        if os.path.exists(file_path + "_info.txt"):
            with open(file_path + "_info.txt", 'r') as f:
                metastring = f.readline()
        else:
            metastring = ""

        file_info = File(file_path, metastring).get_all()

        # Get the file data
        with open(file_info["path"], "r") as file_open:
            file_data = file_open.read(buf + 1)
        if len(file_data) > buf:
            file_info["data"] = binascii.b2a_hex(file_data[:buf] +
                                                 " <truncated>")
        else:
            file_info["data"] = binascii.b2a_hex(file_data)

        metastrings = metastring.split(",")
        if len(metastrings) > 1:
            file_info["pid"] = metastrings[1]
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[2]
            file_info["process_name"] = metastrings[2].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[3]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""

        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"
            if file_info["cape_type_code"] == INJECTION_PE:
                file_info["cape_type"] = "Injected PE Image"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SHELLCODE:
                file_info["cape_type"] = "Injected Shellcode/Data"
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[4]
                    file_info["target_process"] = metastrings[4].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[5]
            if file_info["cape_type_code"] == INJECTION_SECTION:
                file_info["cape_type"] = "Injected Section"
                if len(metastrings) > 4:
                    file_info["section_handle"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_PE:
                file_info["cape_type"] = "Extracted PE Image"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            if file_info["cape_type_code"] == EXTRACTION_SHELLCODE:
                file_info["cape_type"] = "Extracted Shellcode"
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[4]
            type_strings = file_info["type"].split()
            if type_strings[0] == ("PE32+"):
                file_info["cape_type"] += ": 64-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            if type_strings[0] == ("PE32"):
                file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                plugx_parser = plugx.PlugXConfig()
                plugx_config = plugx_parser.parse_config(
                    file_data, len(file_data))
                if not "cape_config" in cape_config and plugx_config:
                    cape_config["cape_config"] = {}
                    for key, value in plugx_config.items():
                        cape_config["cape_config"].update({key: [value]})
                    cape_name = "PlugX"
                else:
                    log.error(
                        "CAPE: PlugX config parsing failure - size many not be handled."
                    )
                append_file = False
            if file_info["cape_type_code"] == PLUGX_PAYLOAD:
                file_info["cape_type"] = "PlugX Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            # EvilGrab
            if file_info["cape_type_code"] == EVILGRAB_PAYLOAD:
                file_info["cape_type"] = "EvilGrab Payload"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if file_info["size"] == 256 or file_info["size"] == 260:
                    ConfigItem = "filepath"
                    ConfigData = format(file_data)
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            # Sedreco
            if file_info["cape_type_code"] == SEDRECO_DATA:
                cape_name = "Sedreco"
                cape_config["cape_type"] = "Sedreco Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                if len(metastrings) > 4:
                    SedrecoConfigIndex = metastrings[4]
                if SedrecoConfigIndex == '0x0':
                    ConfigItem = "Timer1"
                elif SedrecoConfigIndex == '0x1':
                    ConfigItem = "Timer2"
                elif SedrecoConfigIndex == '0x2':
                    ConfigItem = "Computer Name"
                elif SedrecoConfigIndex == '0x3':
                    ConfigItem = "C&C1"
                elif SedrecoConfigIndex == '0x4':
                    ConfigItem = "C&C2"
                elif SedrecoConfigIndex == '0x5':
                    ConfigItem = "Operation Name"
                elif SedrecoConfigIndex == '0x6':
                    ConfigItem = "Keylogger MaxBuffer"
                elif SedrecoConfigIndex == '0x7':
                    ConfigItem = "Keylogger MaxTimeout"
                elif SedrecoConfigIndex == '0x8':
                    ConfigItem = "Keylogger Flag"
                elif SedrecoConfigIndex == '0x9':
                    ConfigItem = "C&C3"
                else:
                    ConfigItem = "Unknown"
                ConfigData = format(file_data)
                if ConfigData:
                    cape_config["cape_config"].update(
                        {ConfigItem: [ConfigData]})
                append_file = False
            # Cerber
            if file_info["cape_type_code"] == CERBER_CONFIG:
                file_info["cape_type"] = "Cerber Config"
                cape_config["cape_type"] = "Cerber Config"
                cape_name = "Cerber"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                ConfigItem = "JSON Data"
                parsed = json.loads(file_data.rstrip(b'\0'))
                ConfigData = json.dumps(parsed, indent=4, sort_keys=True)
                cape_config["cape_config"].update({ConfigItem: [ConfigData]})
                append_file = True
            if file_info["cape_type_code"] == CERBER_PAYLOAD:
                file_info["cape_type"] = "Cerber Payload"
                cape_config["cape_type"] = "Cerber Payload"
                cape_name = "Cerber"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                append_file = True
            # Ursnif
            if file_info["cape_type_code"] == URSNIF_CONFIG:
                file_info["cape_type"] = "Ursnif Config"
                cape_config["cape_type"] = "Ursnif Config"
                cape_name = "Ursnif"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "malwareconfig")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.info("CAPE: Imported malwareconfig.com parser %s",
                             cape_name)
                except ImportError:
                    log.info(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        if not "cape_config" in cape_config:
                            cape_config["cape_config"] = {}
                        malwareconfig_config = module.config(file_data)
                        if isinstance(malwareconfig_config, list):
                            for (key,
                                 value) in malwareconfig_config[0].iteritems():
                                cape_config["cape_config"].update(
                                    {key: [value]})
                        elif isinstance(malwareconfig_config, dict):
                            for (key,
                                 value) in malwareconfig_config.iteritems():
                                cape_config["cape_config"].update(
                                    {key: [value]})
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = False
            # Hancitor
            if file_info["cape_type_code"] == HANCITOR_PAYLOAD:
                cape_name = "Hancitor"
                cape_config["cape_type"] = "Hancitor Payload"
                file_info["cape_type"] = "Hancitor Payload"
            if file_info["cape_type_code"] == HANCITOR_CONFIG:
                cape_name = "Hancitor"
                cape_config["cape_type"] = "Hancitor Config"
                file_info["cape_type"] = "Hancitor Config"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                ConfigStrings = file_data.split('\0')
                ConfigStrings = filter(None, ConfigStrings)
                ConfigItem = "Campaign Code"
                cape_config["cape_config"].update(
                    {ConfigItem: [ConfigStrings[0]]})
                GateURLs = ConfigStrings[1].split('|')
                for index, value in enumerate(GateURLs):
                    ConfigItem = "Gate URL " + str(index + 1)
                    cape_config["cape_config"].update({ConfigItem: [value]})
                append_file = False
            # QakBot
            if file_info["cape_type_code"] == QAKBOT_CONFIG:
                file_info["cape_type"] = "QakBot Config"
                cape_config["cape_type"] = "QakBot Config"
                cape_name = "QakBot"
                if not "cape_config" in cape_config:
                    cape_config["cape_config"] = {}
                for line in file_data.splitlines():
                    if '=' in line:
                        index = line.split('=')[0]
                        data = line.split('=')[1]
                    if index == '10':
                        ConfigItem = "Botnet name"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '11':
                        ConfigItem = "Number of C2 servers"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '47':
                        ConfigItem = "Bot ID"
                        ConfigData = data
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '3':
                        ConfigItem = "Config timestamp"
                        ConfigData = datetime.datetime.fromtimestamp(
                            int(data)).strftime('%H:%M:%S %d-%m-%Y')
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '22':
                        values = data.split(':')
                        ConfigItem = "Password #1"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #1"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #1"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '23':
                        values = data.split(':')
                        ConfigItem = "Password #2"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #2"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #2"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '24':
                        values = data.split(':')
                        ConfigItem = "Password #3"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #3"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #3"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '25':
                        values = data.split(':')
                        ConfigItem = "Password #4"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #4"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #4"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                    if index == '26':
                        values = data.split(':')
                        ConfigItem = "Password #5"
                        ConfigData = values[2]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "Username #5"
                        ConfigData = values[1]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                        ConfigItem = "C2 #5"
                        ConfigData = values[0]
                        if ConfigData:
                            cape_config["cape_config"].update(
                                {ConfigItem: [ConfigData]})
                append_file = False
            if file_info["cape_type_code"] == QAKBOT_PAYLOAD:
                file_info["cape_type"] = "QakBot Payload"
                cape_config["cape_type"] = "QakBot Payload"
                cape_name = "QakBot"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                append_file = True
            # UPX package output
            if file_info["cape_type_code"] == UPX:
                file_info["cape_type"] = "Unpacked PE Image"
                type_strings = file_info["type"].split()
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data, CAPE_output)

            # Check for a payload or config hit
            try:
                if "payload" in hit["meta"]["cape_type"].lower(
                ) or "config" in hit["meta"]["cape_type"].lower():
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"]
            except:
                pass
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            suppress_parsing_list = ["Cerber", "Ursnif", "QakBot"]

            if hit["name"] in suppress_parsing_list:
                continue

            # Attempt to import a parser for the hit
            # DC3-MWCP
            mwcp_loaded = False
            if cape_name:
                try:
                    mwcp_parsers = os.path.join(CUCKOO_ROOT, "modules",
                                                "processing", "parsers",
                                                "mwcp", "parsers")
                    mwcp = reporter.Reporter(parserdir=mwcp_parsers)
                    kwargs = {}
                    mwcp.run_parser(cape_name, data=file_data, **kwargs)
                    if mwcp.errors == []:
                        log.info("CAPE: Imported DC3-MWCP parser %s",
                                 cape_name)
                        mwcp_loaded = True
                    else:
                        error_lines = mwcp.errors[0].split("\n")
                        for line in error_lines:
                            if line.startswith('ImportError: '):
                                log.info("CAPE: DC3-MWCP parser: %s",
                                         line.split(': ')[1])
                except ImportError:
                    pass

            # malwareconfig
            malwareconfig_loaded = False
            if cape_name and mwcp_loaded == False:
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "malwareconfig")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.info("CAPE: Imported malwareconfig.com parser %s",
                             cape_name)
                except ImportError:
                    log.info(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)

            # Get config data
            if mwcp_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                        cape_config["cape_config"] = convert(mwcp.metadata)
                    else:
                        cape_config["cape_config"].update(
                            convert(mwcp.metadata))
                except Exception as e:
                    log.error(
                        "CAPE: DC3-MWCP config parsing error with %s: %s",
                        cape_name, e)
            elif malwareconfig_loaded:
                try:
                    if not "cape_config" in cape_config:
                        cape_config["cape_config"] = {}
                    malwareconfig_config = module.config(file_data)
                    if isinstance(malwareconfig_config, list):
                        for (key,
                             value) in malwareconfig_config[0].iteritems():
                            cape_config["cape_config"].update({key: [value]})
                    elif isinstance(malwareconfig_config, dict):
                        for (key, value) in malwareconfig_config.iteritems():
                            cape_config["cape_config"].update({key: [value]})
                except Exception as e:
                    log.error("CAPE: malwareconfig parsing error with %s: %s",
                              cape_name, e)

            if "cape_config" in cape_config:
                if cape_config["cape_config"] == {}:
                    del cape_config["cape_config"]

        if cape_name:
            if "cape_config" in cape_config:
                cape_config["cape_name"] = format(cape_name)
            if not "cape" in self.results:
                if cape_name != "UPX":
                    self.results["cape"] = cape_name

        # Remove duplicate payloads from web ui
        for cape_file in CAPE_output:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"],
                                                  cape_file["ssdeep"])
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info["entrypoint"] and file_info["entrypoint"] == cape_file["entrypoint"] \
                    and file_info["ep_bytes"] == cape_file["ep_bytes"]:
                    append_file = False

        if append_file == True:
            CAPE_output.append(file_info)
        return file_info
Example #31
0
    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not os.path.exists(self.procdump_path):
            return None

        meta = dict()
        if os.path.exists(self.files_metadata):
            for line in open(self.files_metadata, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                    "metadata": entry["metadata"],
                }

        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)

            file_info = File(file_path=file_path,
                             guest_paths=meta[file_path]["metadata"],
                             file_name=file_name).get_all()
            metastrings = meta[file_path].get("metadata", "").split(";?")
            if len(metastrings) < 3:
                continue
            file_info["process_path"] = metastrings[1]
            file_info["module_path"] = metastrings[2]
            file_info["process_name"] = file_info["process_path"].split(
                "\\")[-1]
            file_info["pid"] = meta[file_path]["pids"][0]
            type_strings = file_info["type"].split()
            if len(type_strings) < 3:
                continue
            if type_strings[0] == "MS-DOS":
                file_info["cape_type"] = "DOS MZ image: executable"
            else:
                file_info["cape_type"] = "PE image"
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                elif type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            readit = False
            for texttype in texttypes:
                if texttype in file_info["type"]:
                    readit = True
                    break
            if readit:
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                if len(filedata) > buf:
                    file_info["data"] = convert_to_printable(filedata[:buf] +
                                                             " <truncated>")
                else:
                    file_info["data"] = convert_to_printable(filedata)

            procdump_files.append(file_info)

        return procdump_files
Example #32
0
def reversing_labs_lookup(target: str, is_hash: bool = False):
    _headers = {
        "User-Agent": "CAPE Sandbox",
        "Content-Type": "application/json",
        "Authorization": f"Token {processing_conf.reversinglabs.key}",
    }

    report_fields = [
        "id",
        "sha1",
        "sha256",
        "sha512",
        "md5",
        "category",
        "file_type",
        "file_subtype",
        "identification_name",
        "identification_version",
        "file_size",
        "extracted_file_count",
        "local_first_seen",
        "local_last_seen",
        "classification_origin",
        "classification_reason",
        "classification_source",
        "classification",
        "riskscore",
        "classification_result",
        "ticore",
        "tags",
        "summary",
        "discussion",
        "ticloud",
        "aliases",
    ]
    if not is_hash:
        sha256 = target if len(target) == 64 else File(target).get_sha256()
    else:
        sha256 = target
    full_report_lookup = {
        "hash_values": [sha256],
        "report_fields": report_fields
    }
    try:
        r = requests.post(
            url=processing_conf.reversinglabs.url +
            "/api/samples/v2/list/details/",
            headers=_headers,
            data=json.dumps(full_report_lookup),
        )
    except requests.exceptions.RequestException as e:
        return {
            "error": True,
            "msg": f"Unable to complete connection to Reversing Labs: {e}",
        }

    reversing_labs_response = r.json()
    if r.status_code != 200:
        return {
            "error":
            True,
            "msg":
            f"Unable to complete lookup to Reversing Labs: {r.json().get('message')}",
        }
    if not reversing_labs_response.get("results"):
        return {"error": True, "msg": "No results found."}
    results = reversing_labs_response["results"][0]
    # most_recent_scan_engines = results["av_scanners"][-1]

    scanner_summary = results["av_scanners_summary"]
    sample_summary = results["sample_summary"]
    ticloud = results["ticloud"]
    ticore = results["ticore"]

    scanner_total = scanner_summary["scanner_count"]
    scanner_evil = scanner_summary["scanner_match"]
    classification = sample_summary["classification"]
    classification_result = sample_summary["classification_result"]
    file = ticore["info"]["file"]
    malicious = classification in [
        "malicious", "suspicious"
    ] and sample_summary["goodware_override"] is False
    md5 = sample_summary["md5"]
    sha1 = sample_summary["sha1"]
    sha256 = sample_summary["sha256"]
    riskscore = ticloud["riskscore"]
    name = file["proposed_filename"]
    entropy = file["entropy"]
    story = ticore["story"]

    reversing_labs = {
        "name": name,
        "md5": md5,
        "sha1": sha1,
        "sha256": sha256,
        "malicious": malicious,
        "classification": classification,
        "classification_result": classification_result,
        "riskscore": riskscore,
        "detected": scanner_evil,
        "total": scanner_total,
        "story": story,
        "permalink": os.path.join(processing_conf.reversinglabs.url, sha256),
    }

    return reversing_labs
Example #33
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256",
                                      unique=True,
                                      sparse=True,
                                      name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        # Store the sample in GridFS.
        if results["info"]["category"] == "file":
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"] = {"pcap_id": pcap_id}
            report["network"].update(results["network"])

        # Store the process memory dump file in GridFS and reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(self.analysis_path, "memory",
                                            "{0}.dmp".format(procmem["pid"]))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update(
                        {"procmem_id": procmem_id})

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop,
                                                 filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []
        for process in report["behavior"]["processes"]:
            new_process = dict(process)

            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []

                # Append call to the chunk.
                chunk.append(call)

            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)

            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)

        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.disconnect()
Example #34
0
    def run(self):
        """Run Google play unofficial python api the get the google play information
        @return: list of google play features
        """
        self.key = "googleplay"
        googleplay = {}

        if not HAVE_GOOGLEPLAY:
            log.error("Unable to import the GooglePlay library, has it been "
                      "installed properly?")
            return

        if not HAVE_ANDROGUARD:
            log.error("Could not find the Androguard library, please install "
                      "it. (`pip install androguard`)")

        if ("file" not in self.task["category"]):
            return

        f = File(self.task["target"])
        if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type():
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            android_id = self.options.get("android_id")
            google_login = self.options.get("google_login")
            google_password = self.options.get("google_password")
            # auth_token = self.options.get("auth_token", None)

            if not android_id and not google_login and not google_password:
                raise CuckooProcessingError("Google Play Credentials not configured, skip")

            try:
                a = APK(self.file_path)
                if a.is_valid_APK():
                    package = a.get_package()
                    # Connect
                    api = GooglePlayAPI(android_id)
                    api.login(google_login, google_password, None)

                    # Get the version code and the offer type from the app details
                    app_data = api.details(package)
                    app_detail = app_data.docV2.details.appDetails

                    if not app_detail.installationSize:
                        return googleplay

                    googleplay["title"] = app_detail.title
                    googleplay["app_category"] = app_detail.appCategory._values
                    googleplay["version_code"] = app_detail.versionCode
                    googleplay["app_type"] = app_detail.appType
                    googleplay["content_rating"] = app_detail.contentRating
                    googleplay["developer_email"] = app_detail.developerEmail
                    googleplay["developer_name"] = app_detail.developerName
                    googleplay["developer_website"] = app_detail.developerWebsite
                    googleplay["installation_size"] = app_detail.installationSize
                    googleplay["num_downloads"] = app_detail.numDownloads
                    googleplay["upload_date"] = app_detail.uploadDate
                    googleplay["permissions"] = app_detail.permission._values
            except (IOError, OSError, BadZipfile) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

        return googleplay
Example #35
0
    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = self.task.options
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes
        options["upload_max_size"] = self.cfg.resultserver.upload_max_size
        options["do_upload_max_size"] = int(
            self.cfg.resultserver.do_upload_max_size)

        if not self.task.timeout or self.task.timeout == 0:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            # if it's a PE file, collect export information to use in more smartly determining the right
            # package to use
            options["exports"] = ""
            if HAVE_PEFILE and ("PE32" in options["file_type"] or
                                "MS-DOS executable" in options["file_type"]):
                try:
                    pe = pefile.PE(self.task.target)
                    if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                        exports = []
                        for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                            try:
                                if not exported_symbol.name:
                                    continue
                                if isinstance(exported_symbol.name, bytes):
                                    exports.append(
                                        re.sub(b"[^A-Za-z0-9_?@-]", b"",
                                               exported_symbol.name).decode(
                                                   "utf-8"))
                                else:
                                    exports.append(
                                        re.sub("[^A-Za-z0-9_?@-]", "",
                                               exported_symbol.name))
                            except Exception as e:
                                log.error(e, exc_info=True)

                        options["exports"] = ",".join(exports)
                except Exception as e:
                    log.error("PE type not recognised")
                    log.error(e, exc_info=True)

        # options from auxiliar.conf
        for plugin in self.aux_cfg.auxiliar_modules.keys():
            options[plugin] = self.aux_cfg.auxiliar_modules[plugin]

        return options
Example #36
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [shot for shot in os.listdir(shots_path)
                     if shot.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    # Strip the extension as it's added later 
                    # in the Django view
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {"pid": process["process_id"],
                                     "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        # Calculate the mlist_cnt for display if present to reduce db load
        if "signatures" in results:
            for entry in results["signatures"]:
                if entry["name"] == "ie_martian_children":
                    report["mlist_cnt"] = len(entry["data"])
                if entry["name"] == "office_martian_children":
                    report["f_mlist_cnt"] = len(entry["data"])

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])
        
        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            child_key, csize = self.debug_dict_size(report[parent_key])[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                while error_saved:
                    log.warn("results['%s']['%s'] deleted due to >16MB size (%dMB)" %
                             (parent_key, child_key, int(psize) / 1048576))
                    del report[parent_key][child_key]
                    try:
                        self.db.analysis.save(report)
                        error_saved = False
                    except InvalidDocument as e:
                        parent_key, psize = self.debug_dict_size(report)[0]
                        child_key, csize = self.debug_dict_size(report[parent_key])[0]
                        log.error(str(e))
                        log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576))
                        log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576))

        self.conn.close()
    def run(self, results):
        self.noinject = self.options.get("noinject", False)
        filesdict = {}
        self.task_options_stack = []
        self.task_options = None
        self.task_custom = None
        report = dict(results)

        if report["info"].has_key(
                "options") and "resubmitjob=true" in report["info"]["options"]:
            return
        else:
            self.task_options_stack.append("resubmitjob=true")
        if self.noinject:
            self.task_options_stack.append("free=true")

        if self.task_options_stack:
            self.task_options = ','.join(self.task_options_stack)

        report = dict(results)
        for dropped in report["dropped"]:
            if os.path.isfile(dropped["path"]):
                if ("PE32" in dropped["type"] or "MS-DOS"
                        in dropped["type"]) and "DLL" not in dropped["type"]:
                    if not filesdict.has_key(dropped['sha256']):
                        filesdict[dropped['sha256']] = dropped['path']

        if report.has_key("suricata") and report["suricata"]:
            if report["suricata"].has_key(
                    "files") and report["suricata"]["files"]:
                for suricata_file_e in results["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        if os.path.isfile(
                                suricata_file_e["file_info"]["path"]):
                            ftype = suricata_file_e["file_info"]["type"]
                            if ("PE32" in ftype or "MS-DOS"
                                    in ftype) and "DLL" not in ftype:
                                if not filesdict.has_key(
                                        suricata_file_e["file_info"]
                                    ["sha256"]):
                                    filesdict[suricata_file_e["file_info"]
                                              ["sha256"]] = suricata_file_e[
                                                  "file_info"]["path"]

        db = Database()

        for e in filesdict:
            if not File(filesdict[e]).get_size():
                continue
            if not db.find_sample(sha256=e) is None:
                continue

            self.task_custom = "Parent_Task_ID:%s" % report["info"]["id"]
            if report["info"].has_key("custom") and report["info"]["custom"]:
                self.task_custom = "%s Parent_Custom:%s" % (
                    self.task_custom, report["info"]["custom"])
            task_id = db.add_path(file_path=filesdict[e],
                                  package='exe',
                                  timeout=200,
                                  options=self.task_options,
                                  priority=1,
                                  machine=None,
                                  platform=None,
                                  custom=self.task_custom,
                                  memory=False,
                                  enforce_timeout=False,
                                  clock=None,
                                  tags=None)

            if task_id:
                log.info(u"Resubmitexe file \"{0}\" added as task with ID {1}".
                         format(filesdict[e], task_id))
            else:
                log.warn("Error adding resubmitexe task to database")
Example #38
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=procdump.pretty_print(),
                )
                endlimit = ""
                if not HAVE_RE2:
                    endlimit = "8192"

                if do_strings:
                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(minchars) + "," + endlimit + "})\x00"
                        upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "})\x00\x00"
                    else:
                        apat = "[\x20-\x7e]{" + str(minchars) + "," + endlimit + "}"
                        upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + "," + endlimit + "}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(str(ws.decode("utf-16le")))

                    proc["strings_path"] = dmp_path + ".strings"
                    f=open(proc["strings_path"], "w")
                    f.write("\n".join(strings))
                    f.close()

                procdump.close()

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                results.append(proc)
        return results
Example #39
0
class TestFile:
    def setUp(self):
        self.tmp = tempfile.mkstemp()
        self.file = File(self.tmp[1])

    def test_get_name(self):
        assert_equal(self.tmp[1].split("/")[-1], self.file.get_name())

    def test_get_data(self):
        assert_equal("", self.file.get_data())

    def test_get_size(self):
        assert_equal(0, self.file.get_size())

    def test_get_crc32(self):
        assert_equal("00000000", self.file.get_crc32())

    def test_get_md5(self):
        assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5())

    def test_get_sha1(self):
        assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709", self.file.get_sha1())

    def test_get_sha256(self):
        assert_equal("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", self.file.get_sha256())

    def test_get_sha512(self):
        assert_equal("cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", self.file.get_sha512())

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert_not_equal(None, self.file.get_ssdeep())
        except ImportError:
            assert_equal(None, self.file.get_ssdeep())

    def test_get_type(self):
        assert_equal("empty", self.file.get_type())

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in ["name", "size", "crc32", "md5", "sha1", "sha256", "sha512", "ssdeep", "type"]:
            assert key in self.file.get_all()

    def tearDown(self):
        os.remove(self.tmp[1])
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path)
                )

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                results.append(proc)

        return results
Example #41
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION:
                CuckooReportError("Mongo schema version not expected, check data migration tool")
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256", unique=True,
                                      sparse=True, name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if "network" not in report:
            report["network"] = {}

        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm")
        mitmpr = File(mitmproxy_path)
        if mitmpr.valid():
            mitmpr_id = self.store_file(mitmpr)
            report["network"]["mitmproxy_id"] = mitmpr_id

        # Store the process memory dump file in GridFS and reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"]))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update({"procmem_id": procmem_id})

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop, filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            for shot_file in sorted(os.listdir(shots_path)):
                if not shot_file.endswith(".jpg"):
                    continue

                shot_path = os.path.join(self.analysis_path,
                                         "shots", shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        paginate = self.options.get("paginate", 100)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is paginate or if the loop is
                    # completed then store the chunk in MongoDB.
                    if len(chunk) == paginate:
                        to_insert = {"pid": process["pid"], "calls": chunk}
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["pid"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        self.conn.close()
    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup", False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_sha256()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError("Failed to compile urlscrub regex" % (e))
                try:
                   resource = re.sub(urlscrub_compiled_re,"",resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))

            # normalize the URL the way VT appears to
            if not resource.lower().startswith("http://") and not resource.lower().startswith("https://"):
                resource = "http://" + resource
            slashsplit = resource.split('/')
            slashsplit[0] = slashsplit[0].lower()
            slashsplit[2] = slashsplit[2].lower()
            if len(slashsplit) == 3:
                slashsplit.append("")
            resource = "/".join(slashsplit)

            resource = hashlib.sha256(resource).hexdigest()
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url, params=data, verify=True, timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))

        # Work around VT brain-damage
        if isinstance(virustotal, list) and len(virustotal):
            virustotal = virustotal[0]

        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["resource"] = resource
            virustotal["results"]=list(({"vendor":engine.replace(".", "_"),"sig": signature["result"]}) 
                                            for engine, signature in items)
        return virustotal
Example #43
0
    def run(self):
        """Runs VirusTotal processing
        @return: full VirusTotal report.
        """
        self.key = "virustotal"
        virustotal = []

        key = self.options.get("key", None)
        timeout = self.options.get("timeout", 60)
        urlscrub = self.options.get("urlscrub", None)
        do_file_lookup = self.getbool(self.options.get("do_file_lookup",
                                                       False))
        do_url_lookup = self.getbool(self.options.get("do_url_lookup", False))

        if not key:
            raise CuckooProcessingError("VirusTotal API key not "
                                        "configured, skip")

        if self.task["category"] == "file" and do_file_lookup:
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError(
                    "File {0} not found, skipping it".format(self.file_path))

            resource = File(self.file_path).get_md5()
            url = VIRUSTOTAL_FILE_URL

        elif self.task["category"] == "url" and do_url_lookup:
            resource = self.task["target"]
            if urlscrub:
                urlscrub_compiled_re = None
                try:
                    urlscrub_compiled_re = re.compile(urlscrub)
                except Exception as e:
                    raise CuckooProcessingError(
                        "Failed to compile urlscrub regex" % (e))
                try:
                    resource = re.sub(urlscrub_compiled_re, "", resource)
                except Exception as e:
                    raise CuckooProcessingError("Failed to scrub url" % (e))
            url = VIRUSTOTAL_URL_URL
        else:
            # Not supported type, exit.
            return virustotal

        data = {"resource": resource, "apikey": key}

        try:
            r = requests.get(url,
                             params=data,
                             verify=True,
                             timeout=int(timeout))
            response_data = r.content
        except requests.exceptions.RequestException as e:
            raise CuckooProcessingError("Unable to complete connection "
                                        "to VirusTotal: {0}".format(e))

        try:
            virustotal = json.loads(response_data)
        except ValueError as e:
            raise CuckooProcessingError("Unable to convert response to "
                                        "JSON: {0}".format(e))
        if "scans" in virustotal:
            items = virustotal["scans"].items()
            virustotal["scans"] = dict((engine.replace(".", "_"), signature)
                                       for engine, signature in items)
            virustotal["results"] = list(({
                "vendor": engine.replace(".", "_"),
                "sig": signature["result"]
            }) for engine, signature in items)
        return virustotal
Example #44
0
    def run(self, results):
        """Writes report.
        @param results: Cuckoo results dict.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        self._connect()

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually created. 
        #  Returns None if the index already exists.
        self._db.fs.files.ensure_index("md5", unique=True, name="md5_unique")

        # Add pcap file, check for dups and in case add only reference.
        pcap_file = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_file)
        if pcap.valid():
            pcap_id = self.store_file(pcap)

            # Preventive key check.
            if "network" in results and isinstance(results["network"], dict):
                results["network"]["pcap_id"] = pcap_id
            else:
                results["network"] = {"pcap_id": pcap_id}

        # Add dropped files, check for dups and in case add only reference.
        dropped_files = {}
        for dir_name, dir_names, file_names in os.walk(os.path.join(self.analysis_path, "files")):
            for file_name in file_names:
                file_path = os.path.join(dir_name, file_name)
                drop = File(file_path)
                dropped_files[drop.get_md5()] = drop

        result_files = dict((dropped.get("md5", None), dropped) for dropped in results["dropped"])

        # hopefully the md5s in dropped_files and result_files should be the same
        if set(dropped_files.keys()) - set(result_files.keys()):
            log.warning("Dropped files in result dict are different from those in storage.")

        # store files in gridfs
        for md5, fileobj in dropped_files.items():
            # only store in db if we have a filename for it in results (should be all)
            resultsdrop = result_files.get(md5, None)
            if resultsdrop and fileobj.valid():
                drop_id = self.store_file(fileobj, filename=resultsdrop["name"])
                resultsdrop["dropped_id"] = drop_id

        # Add screenshots.
        results["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [f for f in os.listdir(shots_path) if f.endswith(".jpg")]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots", shot_file)
                shot = File(shot_path)
                if shot.valid():
                    shot_id = self.store_file(shot)
                    results["shots"].append(shot_id)

        # Save all remaining results.
        try:
            self._db.analysis.save(results, manipulate=False)
        except InvalidDocument:
            # The document is too big, we need to shrink it and re-save it.
            results["behavior"]["processes"] = ""

            # Let's add an error message to the debug block.
            error = ("The analysis results were too big to be stored, " +
                     "the detailed behavioral analysis has been stripped out.")
            results["debug"]["errors"].append(error)

            # Try again to store, if it fails, just abort.
            try:
                self._db.analysis.save(results)
            except Exception as e:
                raise CuckooReportError("Failed to store the document into MongoDB: %s" % e)
Example #45
0
class TestFile:
    def setUp(self):
        self.tmp = tempfile.mkstemp()
        self.file = File(self.tmp[1])

    def test_get_name(self):
        assert_equal(self.tmp[1].split("/")[-1], self.file.get_name())

    def test_get_data(self):
        assert_equal("", self.file.get_data())

    def test_get_size(self):
        assert_equal(0, self.file.get_size())

    def test_get_crc32(self):
        assert_equal("00000000", self.file.get_crc32())

    def test_get_md5(self):
        assert_equal("d41d8cd98f00b204e9800998ecf8427e", self.file.get_md5())

    def test_get_sha1(self):
        assert_equal("da39a3ee5e6b4b0d3255bfef95601890afd80709",
                     self.file.get_sha1())

    def test_get_sha256(self):
        assert_equal(
            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
            self.file.get_sha256())

    def test_get_sha512(self):
        assert_equal(
            "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
            self.file.get_sha512())

    def test_get_ssdeep(self):
        try:
            import pydeep
            assert_not_equal(None, self.file.get_ssdeep())
        except ImportError:
            assert_equal(None, self.file.get_ssdeep())

    def test_get_type(self):
        assert_equal("empty", self.file.get_type())

    def test_get_all_type(self):
        assert isinstance(self.file.get_all(), dict)

    def test_get_all_keys(self):
        for key in [
                "name", "size", "crc32", "md5", "sha1", "sha256", "sha512",
                "ssdeep", "type"
        ]:
            assert key in self.file.get_all()

    def tearDown(self):
        os.remove(self.tmp[1])
Example #46
0
    def run(self):
        """Run Suricata.
        @return: hash with alerts
        """
        self.key = "suricata"
        # General
        SURICATA_CONF = self.options.get("conf", None)
        SURICATA_EVE_LOG = self.options.get("evelog", None)
        SURICATA_ALERT_LOG = self.options.get("alertlog", None)
        SURICATA_TLS_LOG = self.options.get("tlslog", None)
        SURICATA_HTTP_LOG = self.options.get("httplog", None)
        SURICATA_SSH_LOG = self.options.get("sshlog", None)
        SURICATA_DNS_LOG = self.options.get("dnslog", None)
        SURICATA_FILE_LOG = self.options.get("fileslog", None)
        SURICATA_FILES_DIR = self.options.get("filesdir", None)
        SURICATA_RUNMODE = self.options.get("runmode", None)
        SURICATA_FILE_BUFFER = self.options.get("buffer", 8192)
        Z7_PATH = self.options.get("7zbin", None)
        FILES_ZIP_PASS = self.options.get("zippass", None)

        # Socket
        SURICATA_SOCKET_PATH = self.options.get("socket_file", None)

        # Command Line
        SURICATA_BIN = self.options.get("bin", None)

        suricata = dict()
        suricata["alerts"] = []
        suricata["tls"] = []
        suricata["perf"] = []
        suricata["files"] = []
        suricata["http"] = []
        suricata["dns"] = []
        suricata["ssh"] = []
        suricata["fileinfo"] = []

        suricata["eve_log_full_path"] = None
        suricata["alert_log_full_path"] = None
        suricata["tls_log_full_path"] = None
        suricata["http_log_full_path"] = None
        suricata["file_log_full_path"] = None
        suricata["ssh_log_full_path"] = None
        suricata["dns_log_full_path"] = None

        tls_items = [
            "fingerprint", "issuer", "version", "subject", "sni", "ja3",
            "serial"
        ]

        SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_ALERT_LOG)
        SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_TLS_LOG)
        SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_HTTP_LOG)
        SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_SSH_LOG)
        SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_DNS_LOG)
        SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_EVE_LOG)
        SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_FILE_LOG)
        SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_FILES_DIR)

        separate_log_paths = [
            ("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH),
            ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH),
            ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH),
            ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH),
            ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)
        ]

        # handle reprocessing
        all_log_paths = [x[1] for x in separate_log_paths] + \
            [SURICATA_EVE_LOG_FULL_PATH, SURICATA_FILE_LOG_FULL_PATH]
        for log_path in all_log_paths:
            if os.path.exists(log_path):
                try:
                    os.unlink(log_path)
                except:
                    pass
        if os.path.isdir(SURICATA_FILES_DIR_FULL_PATH):
            try:
                shutil.rmtree(SURICATA_FILES_DIR_FULL_PATH, ignore_errors=True)
            except:
                pass

        if not os.path.exists(SURICATA_CONF):
            log.warning(
                "Unable to Run Suricata: Conf File {} Does Not Exist".format(
                    SURICATA_CONF))
            return suricata["alerts"]
        if not os.path.exists(self.pcap_path):
            log.warning(
                "Unable to Run Suricata: Pcap file {} Does Not Exist".format(
                    self.pcap_path))
            return suricata["alerts"]

        # Add to this if you wish to ignore any SIDs for the suricata alert logs
        # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for
        # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert)
        sid_blacklist = [
            # SURICATA FRAG IPv6 Fragmentation overlap
            2200074,
            # ET INFO InetSim Response from External Source Possible SinkHole
            2017363,
            # SURICATA UDPv4 invalid checksum
            2200075,
            # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack
            2019416,
        ]

        if SURICATA_RUNMODE == "socket":
            try:
                #from suricatasc import SuricataSC
                from lib.cuckoo.common.suricatasc import SuricataSC
            except Exception as e:
                log.warning("Failed to import suricatasc lib {}".format(e))
                return suricata

            loopcnt = 0
            maxloops = 24
            loopsleep = 5

            args = dict()
            args["filename"] = self.pcap_path
            args["output-dir"] = self.logs_path

            suris = SuricataSC(SURICATA_SOCKET_PATH)
            try:
                suris.connect()
                suris.send_command("pcap-file", args)
            except Exception as e:
                log.warning(
                    "Failed to connect to socket and send command {}: {}".
                    format(SURICATA_SOCKET_PATH, e))
                return suricata
            while loopcnt < maxloops:
                try:
                    pcap_flist = suris.send_command("pcap-file-list")
                    current_pcap = suris.send_command("pcap-current")
                    log.debug("pcapfile list: {} current pcap: {}".format(
                        pcap_flist, current_pcap))

                    if self.pcap_path not in pcap_flist["message"]["files"] and \
                            current_pcap["message"] != self.pcap_path:
                        log.debug(
                            "Pcap not in list and not current pcap lets assume it's processed"
                        )
                        break
                    else:
                        loopcnt = loopcnt + 1
                        time.sleep(loopsleep)
                except Exception as e:
                    log.warning(
                        "Failed to get pcap status breaking out of loop {}".
                        format(e))
                    break

            if loopcnt == maxloops:
                logstr = "Loop timeout of {} sec occurred waiting for file {} to finish processing"
                log.warning(logstr.format(maxloops * loopsleep, current_pcap))
                return suricata
        elif SURICATA_RUNMODE == "cli":
            if not os.path.exists(SURICATA_BIN):
                log.warning(
                    "Unable to Run Suricata: Bin File {} Does Not Exist".
                    format(SURICATA_CONF))
                return suricata["alerts"]
            cmdstr = "{} -c {} -k none -l {} -r {}"
            cmd = cmdstr.format(SURICATA_BIN, SURICATA_CONF, self.logs_path,
                                self.pcap_path)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret != 0:
                log.warning(
                    "Suricata returned a Exit Value Other than Zero {}".format(
                        stderr))
                return suricata

        else:
            log.warning("Unknown Suricata Runmode")
            return suricata

        datalist = []
        if os.path.exists(SURICATA_EVE_LOG_FULL_PATH):
            suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH
            with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log:
                datalist.append(eve_log.read())
        else:
            for path in separate_log_paths:
                if os.path.exists(path[1]):
                    suricata[path[0]] = path[1]
                    with open(path[1], "rb") as the_log:
                        datalist.append(the_log.read())

        if not datalist:
            log.warning("Suricata: Failed to find usable Suricata log file")

        parsed_files = []
        for data in datalist:
            for line in data.splitlines():
                try:
                    parsed = json.loads(line)
                except:
                    log.warning(
                        "Suricata: Failed to parse line {} as json".format(
                            line))
                    continue

                if 'event_type' in parsed:
                    if parsed["event_type"] == "alert":
                        if (parsed["alert"]["signature_id"]
                                not in sid_blacklist and not parsed["alert"]
                            ["signature"].startswith("SURICATA STREAM")):
                            alog = dict()
                            if parsed["alert"]["gid"] == '':
                                alog["gid"] = "None"
                            else:
                                alog["gid"] = parsed["alert"]["gid"]
                            if parsed["alert"]["rev"] == '':
                                alog["rev"] = "None"
                            else:
                                alog["rev"] = parsed["alert"]["rev"]
                            if parsed["alert"]["severity"] == '':
                                alog["severity"] = "None"
                            else:
                                alog["severity"] = parsed["alert"]["severity"]
                            alog["sid"] = parsed["alert"]["signature_id"]
                            try:
                                alog["srcport"] = parsed["src_port"]
                            except:
                                alog["srcport"] = "None"
                            alog["srcip"] = parsed["src_ip"]
                            try:
                                alog["dstport"] = parsed["dest_port"]
                            except:
                                alog["dstport"] = "None"
                            alog["dstip"] = parsed["dest_ip"]
                            alog["protocol"] = parsed["proto"]
                            alog["timestamp"] = parsed["timestamp"].replace(
                                "T", " ")
                            if parsed["alert"]["category"] == '':
                                alog["category"] = "None"
                            else:
                                alog["category"] = parsed["alert"]["category"]
                            alog["signature"] = parsed["alert"]["signature"]
                            suricata["alerts"].append(alog)

                    elif parsed["event_type"] == "http":
                        hlog = dict()
                        hlog["srcport"] = parsed["src_port"]
                        hlog["srcip"] = parsed["src_ip"]
                        hlog["dstport"] = parsed["dest_port"]
                        hlog["dstip"] = parsed["dest_ip"]
                        hlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        keyword = ("uri", "length", "hostname", "status",
                                   "http_method", "contenttype", "ua",
                                   "referrer")
                        keyword_suri = ("url", "length", "hostname", "status",
                                        "http_method", "http_content_type",
                                        "http_user_agent", "http_refer")
                        for key, key_s in zip(keyword, keyword_suri):
                            try:
                                hlog[key] = parsed["http"].get(key_s, "None")
                            except:
                                hlog[key] = "None"
                        suricata["http"].append(hlog)

                    elif parsed["event_type"] == "tls":
                        tlog = dict()
                        tlog["srcport"] = parsed["src_port"]
                        tlog["srcip"] = parsed["src_ip"]
                        tlog["dstport"] = parsed["dest_port"]
                        tlog["dstip"] = parsed["dest_ip"]
                        tlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        for key in tls_items:
                            if key in parsed["tls"]:
                                tlog[key] = parsed["tls"][key]
                        suricata["tls"].append(tlog)

                    elif parsed["event_type"] == "ssh":
                        suricata["ssh"].append(parsed)
                    elif parsed["event_type"] == "dns":
                        suricata["dns"].append(parsed)
                    elif parsed["event_type"] == "fileinfo":
                        flog = dict()
                        flog["http_host"] = parsed.get("http",
                                                       {}).get("hostname", "")
                        flog["http_uri"] = parsed.get("http",
                                                      {}).get("url", "")
                        flog["http_referer"] = parsed.get("http", {}).get(
                            "referer", "")
                        flog["http_user_agent"] = parsed.get("http", {}).get(
                            "http_user_agent", "")
                        flog["protocol"] = parsed.get("proto", "")
                        flog["magic"] = parsed.get("fileinfo",
                                                   {}).get("magic", "")
                        flog["size"] = parsed.get("fileinfo",
                                                  {}).get("size", "")
                        flog["stored"] = parsed.get("fileinfo",
                                                    {}).get("stored", "")
                        flog["sha256"] = parsed.get("fileinfo",
                                                    {}).get("sha256", "")
                        flog["md5"] = parsed.get("fileinfo", {}).get("md5", "")
                        flog["filename"] = parsed.get("fileinfo",
                                                      {}).get("filename", "")
                        flog["file_info"] = dict()
                        if "/" in flog["filename"]:
                            flog["filename"] = flog["filename"].split("/")[-1]
                        parsed_files.append(flog)

        if parsed_files:
            for sfile in parsed_files:
                if sfile.get("stored", False):
                    filename = sfile["sha256"]
                    src_file = "{}/{}/{}".format(SURICATA_FILES_DIR_FULL_PATH,
                                                 filename[0:2], filename)
                    dst_file = "{}/{}".format(SURICATA_FILES_DIR_FULL_PATH,
                                              filename)
                    if os.path.exists(src_file):
                        try:
                            shutil.move(src_file, dst_file)
                        except OSError as e:
                            log.warning(
                                "Unable to move suricata file: {}".format(e))
                            break
                        file_info = File(file_path=dst_file).get_all()
                        try:
                            with open(file_info["path"], "r") as drop_open:
                                filedata = drop_open.read(
                                    SURICATA_FILE_BUFFER + 1)
                            if len(filedata) > SURICATA_FILE_BUFFER:
                                file_info["data"] = convert_to_printable(
                                    filedata[:SURICATA_FILE_BUFFER] +
                                    " <truncated>")
                            else:
                                file_info["data"] = convert_to_printable(
                                    filedata)
                        except UnicodeDecodeError as e:
                            pass
                        if file_info:
                            sfile["file_info"] = file_info
                    suricata["files"].append(sfile)
            with open(SURICATA_FILE_LOG_FULL_PATH, "w") as drop_log:
                drop_log.write(json.dumps(suricata["files"], indent=4))

            # Cleanup file subdirectories left behind by messy Suricata
            for d in [
                    dirpath
                    for (dirpath, dirnames,
                         filenames) in os.walk(SURICATA_FILES_DIR_FULL_PATH)
                    if len(dirnames) == 0 and len(filenames) == 0
            ]:
                try:
                    shutil.rmtree(d)
                except OSError as e:
                    log.warning(
                        "Unable to delete suricata file subdirectories: {}".
                        format(e))

        if SURICATA_FILES_DIR_FULL_PATH and os.path.exists(SURICATA_FILES_DIR_FULL_PATH) and Z7_PATH \
                and os.path.exists(Z7_PATH):
            # /usr/bin/7z a -pinfected -y files.zip files-json.log files
            cmdstr = "cd {} && {} a -p{} -y files.zip {} {}"
            cmd = cmdstr.format(self.logs_path, Z7_PATH, FILES_ZIP_PASS,
                                SURICATA_FILE_LOG, SURICATA_FILES_DIR)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret > 1:
                log.warning(
                    "Suricata: Failed to create {}/files.zip - Error {}".
                    format(self.logs_path, ret))

        suricata["alerts"] = self.sort_by_timestamp(suricata["alerts"])
        suricata["http"] = self.sort_by_timestamp(suricata["http"])
        suricata["tls"] = self.sort_by_timestamp(suricata["tls"])

        return suricata
Example #47
0
 def setUp(self):
     self.tmp = tempfile.mkstemp()
     self.file = File(self.tmp[1])
Example #48
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to S3.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        self.s3_region = self.options.get("region", "us-west-2")
        self.s3_access_key = self.options.get("access_key", "")
        self.s3_secret_key = self.options.get("secret_key", "")
        s3_reports_bucket_name = self.options.get("reports_bucket", "")
        s3_shots_bucket_name = self.options.get("shots_bucket", "")
        s3_samples_bucket_name = self.options.get("samples_bucket", "")
        s3_files_bucket_name = self.options.get("files_bucket", "")
        s3_aux_bucket_name = self.options.get("aux_bucket", "")
        s3_logs_bucket_name = self.options.get("logs_bucket", "")
        s3_pcap_bucket_name = self.options.get("pcap_bucket", "")
        s3_md5_bucket_name = self.options.get("md5_bucket", "")
        cleanup = self.options.get("cleanup", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if not "network" in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    #report["shots"].append("{0}/{1}".format(results['info']['id'], shot_file))
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                chunk_count = 0
                # Using this type of prefix is useful because you can always re-construct it from
                # the original results
                #chunk_prefix = str(results['info']['id']) + '/' + process['process_name']
                chunk_prefix = str(results['info']['id']) + '/' + str(
                    process['process_id'])
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in S1.

                    if len(chunk) == 100:
                        chunk_name = "{0}.{1}".format(chunk_prefix,
                                                      chunk_count)
                        #log.debug("INFO TIME!")
                        #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                        #log.debug(chunk_prefix)
                        err = self.save_to_s3(s3_reports_bucket_name,
                                              chunk_name, json.dumps(chunk))
                        if err != '':
                            log.error(
                                "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}"
                                .format(chunk_name, err))
                        else:
                            chunks_ids.append("{0}.{1}".format(
                                chunk_prefix, chunk_count))
                            chunk_count += 1
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    chunk_name = "{0}.{1}".format(chunk_prefix, chunk_count)
                    #log.debug("%s %s %s" %(s3_reports_bucket_name, chunk_name, chunk_prefix))
                    err = self.save_to_s3(s3_reports_bucket_name, chunk_name,
                                          json.dumps(chunk))
                    if err != '':
                        log.error(
                            "Non-size related issue saving analysis JSON to S3 for chunk {0} - {1}"
                            .format(chunk_name, err))
                    else:
                        chunks_ids.append("{0}.{1}".format(
                            chunk_prefix, chunk_count))

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(
                    results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"].has_key("alerts") and len(
                    results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(
                    results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(
                    results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])
            if results["suricata"].has_key("ssh") and len(
                    results["suricata"]["ssh"]) > 0:
                report["suri_ssh_cnt"] = len(results["suricata"]["ssh"])
            if results["suricata"].has_key("dns") and len(
                    results["suricata"]["dns"]) > 0:
                report["suri_dns_cnt"] = len(results["suricata"]["dns"])

        # Store the report (it's 'object id' is simply the analysis id)
        # First make sure it's not too big (5gb limit)
        data = json.dumps(report)
        if len(data) < 5000000000:
            err = self.save_to_s3(s3_reports_bucket_name,
                                  results['info']['id'], data)
            if err != '':
                log.error(
                    "Non-size related issue saving analysis JSON to S3 for report {0} - {1}"
                    .format(results['info']['id'], err))
        else:
            log.error("JSON for analysis id {0} is greater than 5GB".format(
                results['info']['id']))

        #processes the rest of the analysis files and put them in S3
        if s3_shots_bucket_name != '':
            shots_path = os.path.join(results['info']['id'],
                                      self.analysis_path, "shots")
            self.relocate_to_s3(results['info']['id'], shots_path,
                                s3_shots_bucket_name)
        if s3_pcap_bucket_name != '':
            if os.path.isfile(self.analysis_path + '/dump.pcap'):
                with open(self.analysis_path + '/dump.pcap', 'rb') as infile:
                    self.save_to_s3(
                        s3_pcap_bucket_name,
                        "{0}/dump.pcap".format(results['info']['id']),
                        infile.read())
            if os.path.isfile(self.analysis_path + '/dump_sorted.pcap'):
                with open(self.analysis_path + '/dump_sorted.pcap',
                          'rb') as infile:
                    self.save_to_s3(
                        s3_pcap_bucket_name,
                        "{0}/dump_sorted.pcap".format(results['info']['id']),
                        infile.read())
        if s3_aux_bucket_name != '':
            aux_path = os.path.join(results['info']['id'], self.analysis_path,
                                    "aux")
            self.relocate_to_s3(results['info']['id'], aux_path,
                                s3_aux_bucket_name)
        if s3_logs_bucket_name != '':
            logs_path = os.path.join(results['info']['id'], self.analysis_path,
                                     "logs")
            self.relocate_to_s3(results['info']['id'], logs_path,
                                s3_logs_bucket_name)
        if s3_samples_bucket_name != '':
            sample = os.path.realpath(self.analysis_path + '/binary')
            with open(sample, 'rb') as infile:
                self.save_to_s3(s3_samples_bucket_name,
                                results['target']['file']['sha256'],
                                infile.read())
        #log.debug(s3_files_bucket_name)
        if s3_files_bucket_name != '':
            #log.debug(self.analysis_path)
            for root, dirnames, filenames in os.walk(self.analysis_path +
                                                     '/files'):
                #log.debug(filenames)
                for filename in filenames:
                    key_name = str(results['info']['id']) + '/' + root.split(
                        os.sep)[-1] + '/' + filename
                    if not filename.endswith('_info.txt'):
                        key_name = str(
                            results['info']['id']) + '/' + root.split(
                                os.sep)[-1]
                    #log.debug(key_name)
                    with open(os.path.join(root, filename), 'rb') as infile:
                        self.save_to_s3(s3_files_bucket_name, key_name,
                                        infile.read())
        if s3_md5_bucket_name != '':
            info = {}
            info.update(report['info'])
            info.update(report['target'])
            self.save_to_s3(s3_md5_bucket_name,
                            results['target']['file']['md5'], json.dumps(info))

        if cleanup:
            shutil.rmtree(self.analysis_path)
Example #49
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if "network" not in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    # Strip the extension as it's added later
                    # in the Django view
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []

        for process in report.get("behavior", {}).get("processes", []) or []:
            new_process = dict(process)
            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []
                # Append call to the chunk.
                chunk.append(call)
            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)
            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)
        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes
        # Calculate the mlist_cnt for display if present to reduce db load
        if "signatures" in results:
            for entry in results["signatures"]:
                if entry["name"] == "ie_martian_children":
                    report["mlist_cnt"] = len(entry["data"])
                if entry["name"] == "office_martian_children":
                    report["f_mlist_cnt"] = len(entry["data"])

        # Other info we want quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.get("suricata", False):

            keywords = ("tls", "alerts", "files", "http", "ssh", "dns")
            keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt",
                             "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt")
            for keyword, keyword_value in zip(keywords, keywords_dict):
                if results["suricata"].get(keyword, 0):
                    report[keyword_value] = len(results["suricata"][keyword])

        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        #trick for distributed api
        if results.get("info", {}).get("options", {}).get("main_task_id", ""):
            report["info"]["id"] = int(
                results["info"]["options"]["main_task_id"])

        analyses = self.db.analysis.find(
            {"info.id": int(report["info"]["id"])})
        if analyses.count() > 0:
            log.debug("Deleting analysis data for Task %s" %
                      report["info"]["id"])
            for analysis in analyses:
                for process in analysis["behavior"]["processes"]:
                    for call in process["calls"]:
                        self.db.calls.remove({"_id": ObjectId(call)})
                self.db.analysis.remove({"_id": ObjectId(analysis["_id"])})
            log.debug("Deleted previous MongoDB data for Task %s" %
                      report["info"]["id"])

        self.ensure_valid_utf8(report)

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report, check_keys=False)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" %
                          (parent_key, int(psize) / MEGABYTE))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                size_filter = MONGOSIZELIMIT
                while error_saved:
                    if type(report) == list:
                        report = report[0]
                    try:
                        if type(report[parent_key]) == list:
                            for j, parent_dict in enumerate(
                                    report[parent_key]):
                                child_key, csize = self.debug_dict_size(
                                    parent_dict, parent_key)[0]
                                if csize > size_filter:
                                    if parent_key == child_key:
                                        log.warn(
                                            "results['%s'] deleted due to size: %s"
                                            % (parent_key, csize))
                                        del report[parent_key]
                                        break
                                    else:
                                        log.warn(
                                            "results['%s']['%s'] deleted due to size: %s"
                                            % (parent_key, child_key, csize))
                                        del report[parent_key][j][child_key]
                        else:
                            child_key, csize = self.debug_dict_size(
                                report[parent_key], parent_key)[0]
                            if csize > size_filter:
                                log.warn(
                                    "else - results['%s']['%s'] deleted due to size: %s"
                                    % (parent_key, child_key, csize))
                                del report[parent_key][child_key]
                        try:
                            self.db.analysis.save(report, check_keys=False)
                            error_saved = False
                        except InvalidDocument as e:
                            parent_key, psize = self.debug_dict_size(report)[0]
                            log.error(str(e))
                            log.error("Largest parent key: %s (%d MB)" %
                                      (parent_key, int(psize) / MEGABYTE))
                            size_filter = size_filter - MEGABYTE
                    except Exception as e:
                        log.error("Failed to delete child key: %s" % str(e))
                        error_saved = False

        self.conn.close()
Example #50
0
 def setUp(self):
     self.tmp = tempfile.mkstemp()
     self.file = File(self.tmp[1])
Example #51
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("target",
                        type=str,
                        help="URL, path to the file or folder to analyze")
    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        help="Enable debug logging")
    parser.add_argument(
        "--remote",
        type=str,
        action="store",
        default=None,
        help="Specify IP:port to a Cuckoo API server to submit remotely",
        required=False)
    parser.add_argument("--user",
                        type=str,
                        action="store",
                        default=None,
                        help="Username for Basic Auth",
                        required=False)
    parser.add_argument("--password",
                        type=str,
                        action="store",
                        default=None,
                        help="Password for Basic Auth",
                        required=False)
    parser.add_argument("--sslnoverify",
                        action="store_true",
                        default=False,
                        help="Do not validate SSL cert",
                        required=False)
    parser.add_argument("--ssl",
                        action="store_true",
                        default=False,
                        help="Use SSL/TLS for remote",
                        required=False)
    parser.add_argument("--url",
                        action="store_true",
                        default=False,
                        help="Specify whether the target is an URL",
                        required=False)
    parser.add_argument("--package",
                        type=str,
                        action="store",
                        default="",
                        help="Specify an analysis package",
                        required=False)
    parser.add_argument("--custom",
                        type=str,
                        action="store",
                        default="",
                        help="Specify any custom value",
                        required=False)
    parser.add_argument("--timeout",
                        type=int,
                        action="store",
                        default=0,
                        help="Specify an analysis timeout",
                        required=False)
    parser.add_argument(
        "--options",
        type=str,
        action="store",
        default="",
        help=
        "Specify options for the analysis package (e.g. \"name=value,name2=value2\")",
        required=False)
    parser.add_argument(
        "--priority",
        type=int,
        action="store",
        default=1,
        help="Specify a priority for the analysis represented by an integer",
        required=False)
    parser.add_argument(
        "--machine",
        type=str,
        action="store",
        default="",
        help="Specify the identifier of a machine you want to use",
        required=False)
    parser.add_argument(
        "--platform",
        type=str,
        action="store",
        default="",
        help=
        "Specify the operating system platform you want to use (windows/darwin/linux)",
        required=False)
    parser.add_argument(
        "--memory",
        action="store_true",
        default=False,
        help="Enable to take a memory dump of the analysis machine",
        required=False)
    parser.add_argument(
        "--enforce-timeout",
        action="store_true",
        default=False,
        help="Enable to force the analysis to run for the full timeout period",
        required=False)
    parser.add_argument("--clock",
                        type=str,
                        action="store",
                        default=None,
                        help="Set virtual machine clock",
                        required=False)
    parser.add_argument(
        "--tags",
        type=str,
        action="store",
        default=None,
        help="Specify tags identifier of a machine you want to use",
        required=False)
    parser.add_argument("--max",
                        type=int,
                        action="store",
                        default=None,
                        help="Maximum samples to add in a row",
                        required=False)
    parser.add_argument("--pattern",
                        type=str,
                        action="store",
                        default=None,
                        help="Pattern of files to submit",
                        required=False)
    parser.add_argument("--shuffle",
                        action="store_true",
                        default=False,
                        help="Shuffle samples before submitting them",
                        required=False)
    parser.add_argument("--unique",
                        action="store_true",
                        default=False,
                        help="Only submit new samples, ignore duplicates",
                        required=False)
    parser.add_argument("--quiet",
                        action="store_true",
                        default=False,
                        help="Only print text on failure",
                        required=False)
    parser.add_argument("--gateway",
                        type=str,
                        action="store",
                        default=None,
                        help="Set the default gateway for the task",
                        required=False)
    try:
        args = parser.parse_args()
    except IOError as e:
        parser.error(e)
        return False

    # If the quiet flag has been set, then we also disable the "warning"
    # level of the logging module. (E.g., when pydeep has not been installed,
    # there will be a warning message, because Cuckoo can't resolve the
    # ssdeep hash of this particular sample.)
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig()

    if args.quiet:
        logging.disable(logging.WARNING)

    db = Database()

    target = to_unicode(args.target)

    sane_timeout = min(args.timeout, 60 * 60 * 24)

    gateways = Config("auxiliary").gateways
    if args.gateway and args.gateway in gateways:
        if "," in gateways[args.gateway]:
            tgateway = random.choice(gateways[args.gateway].split(","))
            ngateway = gateways[tgateway]
        else:
            ngateway = gateways[args.gateway]
        if args.options:
            args.options += ","
        args.options += "setgw=%s" % (ngateway)

    if args.url:
        if args.remote:
            if not HAVE_REQUESTS:
                print(
                    bold(red("Error")) +
                    ": you need to install python-requests (`pip install requests`)"
                )
                return False

            if args.ssl:
                url = "https://{0}/tasks/create/url".format(args.remote)
            else:
                url = "http://{0}/tasks/create/url".format(args.remote)

            data = dict(url=target,
                        package=args.package,
                        timeout=sane_timeout,
                        options=args.options,
                        priority=args.priority,
                        machine=args.machine,
                        platform=args.platform,
                        memory=args.memory,
                        enforce_timeout=args.enforce_timeout,
                        custom=args.custom,
                        tags=args.tags)

            try:
                if args.user and args.password:
                    if args.ssl:
                        if args.sslnoverify:
                            verify = False
                        else:
                            verify = True
                        response = requests.post(url,
                                                 auth=(args.user,
                                                       args.password),
                                                 data=data,
                                                 verify=verify)
                    else:
                        response = requests.post(url,
                                                 auth=(args.user,
                                                       args.password),
                                                 data=data)
                else:
                    if args.ssl:
                        if args.sslnoverify:
                            verify = False
                        else:
                            verify = True
                        response = requests.post(url, data=data, verify=verify)
                    else:
                        response = requests.post(url, data=data)

            except Exception as e:
                print(
                    bold(red("Error")) + ": unable to send URL: {0}".format(e))
                return False

            json = response.json()
            task_id = json["task_id"]
        else:
            task_id = db.add_url(target,
                                 package=args.package,
                                 timeout=sane_timeout,
                                 options=args.options,
                                 priority=args.priority,
                                 machine=args.machine,
                                 platform=args.platform,
                                 custom=args.custom,
                                 memory=args.memory,
                                 enforce_timeout=args.enforce_timeout,
                                 clock=args.clock,
                                 tags=args.tags)

        if task_id:
            if not args.quiet:
                print(
                    bold(green("Success")) +
                    u": URL \"{0}\" added as task with ID {1}".format(
                        target, task_id))
        else:
            print(bold(red("Error")) + ": adding task to database")
    else:
        # Get absolute path to deal with relative.
        path = to_unicode(os.path.abspath(target))

        if not os.path.exists(path):
            print(
                bold(red("Error")) +
                u": the specified file/folder does not exist at path \"{0}\"".
                format(path))
            return False

        files = []
        if os.path.isdir(path):
            for dirname, dirnames, filenames in os.walk(path):
                for file_name in filenames:
                    file_path = os.path.join(dirname, file_name)

                    if os.path.isfile(file_path):
                        if args.pattern:
                            if fnmatch.fnmatch(file_name, args.pattern):
                                files.append(to_unicode(file_path))
                        else:
                            files.append(to_unicode(file_path))
        else:
            files.append(path)

        if args.shuffle:
            random.shuffle(files)
        else:
            files = sorted(files)

        for file_path in files:
            if not File(file_path).get_size():
                if not args.quiet:
                    print(
                        bold(
                            yellow("Empty") +
                            ": sample {0} (skipping file)".format(file_path)))

                continue

            if args.max is not None:
                # Break if the maximum number of samples has been reached.
                if not args.max:
                    break

                args.max -= 1

            if args.remote:
                if not HAVE_REQUESTS:
                    print(
                        bold(red("Error")) +
                        ": you need to install python-requests (`pip install requests`)"
                    )
                    return False
                if args.ssl:
                    url = "https://{0}/tasks/create/file".format(args.remote)
                else:
                    url = "http://{0}/tasks/create/file".format(args.remote)

                files = dict(file=open(file_path, "rb"),
                             filename=os.path.basename(file_path))

                data = dict(package=args.package,
                            timeout=sane_timeout,
                            options=args.options,
                            priority=args.priority,
                            machine=args.machine,
                            platform=args.platform,
                            memory=args.memory,
                            enforce_timeout=args.enforce_timeout,
                            custom=args.custom,
                            tags=args.tags)

                try:
                    if args.user and args.password:
                        if args.ssl:
                            if args.sslnoverify:
                                verify = False
                            else:
                                verify = True
                            response = requests.post(url,
                                                     auth=(args.user,
                                                           args.password),
                                                     files=files,
                                                     data=data,
                                                     verify=verify)
                        else:
                            response = requests.post(url,
                                                     auth=(args.user,
                                                           args.password),
                                                     files=files,
                                                     data=data)
                    else:
                        if args.ssl:
                            if args.sslnoverify:
                                verify = False
                            else:
                                verify = True
                            response = requests.post(url,
                                                     files=files,
                                                     data=data,
                                                     verify=verify)
                        else:
                            response = requests.post(url,
                                                     files=files,
                                                     data=data)

                except Exception as e:
                    print(
                        bold(red("Error")) +
                        ": unable to send file: {0}".format(e))
                    return False

                json = response.json()
                task_ids = [json.get("task_ids", None)]

            else:
                if args.unique:
                    sha256 = File(file_path).get_sha256()
                    if not db.find_sample(sha256=sha256) is None:
                        msg = ": Sample {0} (skipping file)".format(file_path)
                        if not args.quiet:
                            print(bold(yellow("Duplicate")) + msg)
                        continue

                task_ids = db.demux_sample_and_add_to_db(
                    file_path=file_path,
                    package=args.package,
                    timeout=sane_timeout,
                    options=args.options,
                    priority=args.priority,
                    machine=args.machine,
                    platform=args.platform,
                    memory=args.memory,
                    custom=args.custom,
                    enforce_timeout=args.enforce_timeout,
                    clock=args.clock,
                    tags=args.tags)

            tasks_count = len(task_ids)
            if tasks_count > 1:
                if not args.quiet:
                    print(
                        bold(green("Success")) +
                        u": File \"{0}\" added as task with IDs {1}".format(
                            file_path, task_ids))
            elif tasks_count > 0:
                if not args.quiet:
                    print(
                        bold(green("Success")) +
                        u": File \"{0}\" added as task with ID {1}".format(
                            file_path, task_ids[0]))
            else:
                print(bold(red("Error")) + ": adding task to database")
Example #52
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        zipdump = self.options.get("zipdump", False)
        zipstrings = self.options.get("zipstrings", False)
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path),
                    zipdump=zipdump,
                    zipstrings=zipstrings,
                )

                if do_strings:
                    try:
                        data = open(dmp_path, "r").read()
                    except (IOError, OSError) as e:
                        raise CuckooProcessingError("Error opening file %s" % e)

                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    else:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    zipstrings = self.options.get("zipstrings", False)
                    if zipstrings:
                        try:
                            f = zipfile.ZipFile("%s.zip" % (proc["strings_path"]), "w")
                            f.write(proc["strings_path"], os.path.basename(proc["strings_path"]), zipfile.ZIP_DEFLATED)
                            f.close()
                            os.remove(proc["strings_path"])
                            proc["strings_path"] = "%s.zip" % (proc["strings_path"])
                        except:
                            raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                if zipdump:
                    try:
                        f = zipfile.ZipFile("%s.zip" % (dmp_path), "w")
                        f.write(dmp_path, os.path.basename(dmp_path), zipfile.ZIP_DEFLATED)
                        f.close()
                        os.remove(dmp_path)
                        proc["file"] = "%s.zip" % (dmp_path)
                    except:
                        raise CuckooProcessingError("Error creating Process Memory Zip File %s" % e)

                results.append(proc)
        return results
Example #53
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = str(self.options.get("minchars", 5)).encode()

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                if os.path.getsize(dmp_path) == 0:
                    continue

                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(
                    os.path.splitext(os.path.basename(dmp_path))[0])
                for process in self.results.get("behavior", {}).get(
                        "processes", []) or []:
                    if process_id == process["process_id"]:
                        process_name = process["process_name"]
                        process_path = process["module_path"]

                procdump = ProcDump(dmp_path, pretty=True)

                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(category="memory"),
                    cape_yara=dmp_file.get_yara(category="CAPE"),
                    address_space=procdump.pretty_print(),
                )

                for hit in proc["cape_yara"]:
                    hit["memblocks"] = {}
                    for item in hit["addresses"]:
                        memblock = self.get_yara_memblock(
                            proc["address_space"], hit["addresses"][item])
                        if memblock:
                            hit["memblocks"][item] = memblock

                # if self.options.get("extract_pe", False)
                extracted_pes = self.get_procmemory_pe(proc)

                endlimit = b""
                if not HAVE_RE2:
                    endlimit = b"8192"

                if do_strings:
                    if nulltermonly:
                        apat = b"([\x20-\x7e]{" + minchars + b"," + endlimit + b"})\x00"
                        upat = b"((?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"})\x00\x00"
                    else:
                        apat = b"[\x20-\x7e]{" + minchars + b"," + endlimit + b"}"
                        upat = b"(?:[\x20-\x7e][\x00]){" + minchars + b"," + endlimit + b"}"

                    matchdict = procdump.search(apat, all=True)
                    strings = matchdict["matches"]
                    matchdict = procdump.search(upat, all=True)
                    ustrings = matchdict["matches"]
                    for ws in ustrings:
                        strings.append(ws.decode("utf-16le").encode())

                    proc["strings_path"] = f"{dmp_path}.strings"
                    proc["extracted_pe"] = extracted_pes
                    f = open(proc["strings_path"], "wb")
                    f.write(b"\n".join(strings))
                    f.close()

                procdump.close()
                results.append(proc)

                cape_name = cape_name_from_yara(proc, process_id, self.results)
                if cape_name and "detections" not in self.results:
                    self.results["detections"] = cape_name
        return results