Example #1
0
    def execute(self, request):
        temp_filename = request.file_path

        # Filter out large documents
        if os.path.getsize(temp_filename) > self.max_pdf_size:
            file_res = Result()
            res = (ResultSection(
                f"PDF Analysis of the file was skipped because the "
                f"file is too big (limit is {(self.max_pdf_size / 1000 / 1000)} MB)."
            ))

            file_res.add_section(res)
            request.result = file_res
            return

        filename = os.path.basename(temp_filename)
        # noinspection PyUnusedLocal
        file_content = ''
        with open(temp_filename, 'rb') as f:
            file_content = f.read()

        if '<xdp:xdp'.encode(encoding='UTF-8') in file_content:
            self.find_xdp_embedded(filename, file_content, request)

        self.peepdf_analysis(temp_filename, file_content, request)
Example #2
0
    def execute(self, request):
        result = Result()

        file_path = request.file_path
        file_type = request.file_type

        shutil.copyfile(file_path, self.working_directory + "/analyzed")

        p1 = subprocess.Popen(
            "java -jar /var/lib/assemblyline/StegExpose/StegExpose.jar " +
            self.working_directory + " standard default " +
            self.working_directory + "/report.csv",
            shell=True)
        p1.wait()

        lsb_steg_results = self.read_csv(self.working_directory +
                                         "/report.csv")
        lsb_steg_results = self.beautify_dict(lsb_steg_results)

        kv_section = ResultSection("Result of the LSB steganalysis",
                                   body_format=BODY_FORMAT.KEY_VALUE,
                                   body=json.dumps(lsb_steg_results))
        result.add_section(kv_section)

        request.result = result
    def execute(self, request):
        """Main Module. See README for details."""
        result = Result()
        self.sha = request.sha256
        local = request.file_path

        text_section = None
        kv_section = None

        extracted, metadata = self.dexray(request, local)

        num_extracted = len(request.extracted)
        if num_extracted != 0:
            text_section = ResultSection("DeXRAY found files:")
            for extracted in request.extracted:
                file_name = extracted.get('name')
                text_section.add_line(
                    f"Resubmitted un-quarantined file as : {file_name}")

        if metadata:
            # Can contain live URLs to the original content source
            kv_section = ResultSection("DeXRAY Quarantine Metadata",
                                       body_format=BODY_FORMAT.JSON,
                                       body=json.dumps(metadata))
            result.add_section(kv_section)

        for section in (text_section, kv_section):
            if section:
                result.add_section(section)
Example #4
0
    def parse_results(response: Dict[str, Any]):
        res = Result()
        response = response['data']

        url_section = ResultSection('VirusTotal report permalink',
                                    body_format=BODY_FORMAT.URL,
                                    body=json.dumps(
                                        {"url": response['links']['self']}))
        res.add_section(url_section)
        response = response['attributes']
        scans = response['last_analysis_results']
        av_hits = ResultSection('Anti-Virus Detections')
        av_hits.add_line(
            f'Found {response["last_analysis_stats"]["malicious"]} AV hit(s) from '
            f'{len(response["last_analysis_results"].keys())}')
        for majorkey, subdict in sorted(scans.items()):
            if subdict['category'] == "malicious":
                virus_name = subdict['result']
                av_hit_section = AvHitSection(majorkey, virus_name)
                av_hit_section.set_heuristic(
                    1, signature=f'{majorkey}.{virus_name}')
                av_hit_section.add_tag('av.virus_name', virus_name)
                av_hits.add_subsection(av_hit_section)

        res.add_section(av_hits)

        return res
    def execute(self, request):
        result = Result()
        url = request.task.metadata.get('submitted_url')
        api_key = request.get_param("api_key")
        public = request.get_param("public")

        u = UrlScan(apikey=api_key, url=url, public=public)
        u.submit()

        # We need to wait for the API to process our request
        response = self.wait_processing(u)

        # We get the response parts that we want and merge them all together
        report = {
            **response.json()["verdicts"]["overall"],
            **response.json()["lists"],
            **response.json()["page"]
        }

        # We convert the "certicates" section from a list of dictionnaries to a dictionnary of lists
        certificates = report.pop("certificates")
        certificates = {
            k: [dic[k] for dic in certificates]
            for k in certificates[0]
        }

        # We add the converted section to the report
        report = {**report, **certificates}

        # We create the KEY_VALUE section to add the report to the result page
        kv_section = ResultSection("Urlscan.io report",
                                   body_format=BODY_FORMAT.KEY_VALUE,
                                   body=json.dumps(report))

        for domain in report["domains"]:
            kv_section.add_tag("network.static.domain", domain.strip())

        result.add_section(kv_section)

        # We get the preview of the website
        screenshot = u.getScreenshot()
        with open(self.working_directory + "/preview.png", "wb") as ofile:
            ofile.write(screenshot)

        # Adding the preview on the result page
        url_section = ResultSection(
            'Urlscan.io website screenshot',
            body_format=BODY_FORMAT.URL,
            body=json.dumps({
                "name": "The preview is also available here !",
                "url": response.json()["task"]["screenshotURL"]
            }))
        result.add_section(url_section)
        request.add_extracted(self.working_directory + "/preview.png",
                              "preview.png", "Here\'s the preview of the site")

        request.result = result
 def test_reduce():
     from assemblyline_v4_service.common.section_reducer import reduce
     from assemblyline_v4_service.common.result import Result, ResultSection
     res = Result()
     result_section = ResultSection("blah")
     res.add_section(result_section)
     reduce(res)
     # Code coverage only
     assert True
 def resubmit_dex2jar_output(self, apk_file: str, target: str, result: Result, request):
     dex = os.path.join(self.working_directory, "classes.dex")
     self.get_dex(apk_file, dex)
     if os.path.exists(dex):
         d2j = Popen([self.dex2jar, "--output", target, dex],
                     stdout=PIPE, stderr=PIPE)
         d2j.communicate()
         if os.path.exists(target):
             res_sec = ResultSection("Classes.dex file was recompiled as a JAR and re-submitted for analysis")
             res_sec.add_line(f"JAR file resubmitted as: {os.path.basename(target)}")
             request.add_extracted(target, os.path.basename(target), "Dex2Jar output JAR file")
             result.add_section(res_sec)
Example #8
0
    def execute(self, request: ServiceRequest) -> None:
        sha256 = request.sha256
        result = Result()

        # First, let's get the analysis metadata, if it exists on the system
        main_api_result = self._get_analysis_metadata(
            request.get_param('analysis_id'), sha256)

        if not main_api_result:
            self.log.debug(f"SHA256 {sha256} is not on the system.")
            request.result = result
            return

        if main_api_result.get(
                "verdict") in Verdicts.NOT_SUPPORTED_VERDICTS.value:
            self.log.debug(f"Unsupported file type: {request.file_type}")
            request.result = result
            return
        elif main_api_result.get("verdict") == AnalysisStatusCode.FAILED.value:
            self.log.warning("The Intezer server is not feeling well :(")
            request.result = result
            return

        analysis_id = main_api_result["analysis_id"]

        # Setup the main result section
        main_kv_section = ResultKeyValueSection(
            "IntezerStatic analysis report")
        processed_main_api_result = self._process_details(
            main_api_result.copy(), UNINTERESTING_ANALYSIS_KEYS)
        main_kv_section.update_items(processed_main_api_result)
        if "family_name" in main_api_result:
            main_kv_section.add_tag("attribution.family",
                                    main_api_result["family_name"])

        # This file-verdict map will be used later on to assign heuristics to sub-analyses
        file_verdict_map = {}
        self._process_iocs(analysis_id, file_verdict_map, main_kv_section)
        if not self.config["is_on_premise"]:
            self._process_ttps(analysis_id, main_kv_section)
        self._handle_subanalyses(request, sha256, analysis_id,
                                 file_verdict_map, main_kv_section)

        # Setting heuristic here to avoid FPs
        if main_kv_section.subsections:
            self._set_heuristic_by_verdict(main_kv_section,
                                           main_api_result["verdict"])

        if main_kv_section.subsections or main_kv_section.heuristic:
            result.add_section(main_kv_section)
        request.result = result
Example #9
0
 def test_parse_results(response, correct_res_secs,
                        metadefender_class_instance):
     from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic
     metadefender_class_instance.blocklist = ["a"]
     metadefender_class_instance.sig_score_revision_map = {}
     metadefender_class_instance.kw_score_revision_map = {}
     metadefender_class_instance.current_node = "http://blah"
     metadefender_class_instance.nodes[
         metadefender_class_instance.current_node] = {
             "engine_map": {
                 "z": {
                     "version": "blah",
                     "def_time": "blah"
                 },
                 "y": {
                     "version": "blah",
                     "def_time": "blah"
                 }
             },
             "queue_times": [],
             "file_count": 0
         }
     correct_result = Result()
     for correct_res_sec in correct_res_secs:
         section = ResultSection(
             correct_res_sec["title_text"],
             body_format=BODY_FORMAT.TEXT if
             not correct_res_sec.get("body_format") else BODY_FORMAT.JSON,
             body=correct_res_sec.get("body"))
         for subsec in correct_res_sec.get("subsections", []):
             subsection = ResultSection(
                 subsec["title_text"],
                 body=subsec["body"],
                 body_format=BODY_FORMAT.KEY_VALUE,
                 tags=subsec.get("tags"),
             )
             if subsec.get("heuristic"):
                 subsection.set_heuristic(subsec["heuristic"]["heur_id"])
                 print(subsec["heuristic"]["signatures"])
                 for key in subsec["heuristic"]["signatures"].keys():
                     subsection.heuristic.add_signature_id(key)
             section.add_subsection(subsection)
         correct_result.add_section(section)
     actual_result = metadefender_class_instance.parse_results(response)
     for index, section in enumerate(actual_result.sections):
         assert check_section_equality(section,
                                       correct_result.sections[index])
Example #10
0
    def execute(self, request: ServiceRequest) -> Optional[Dict[str, Any]]:
        result = Result()
        request.result = result

        # Get AV labels from previous services
        av_labels = request.task.tags.get('av.virus_name')
        if not av_labels:
            return

        # Extract AVclass tags
        av_tags = self._get_avclass_tags(request.md5, request.sha1,
                                         request.sha256, av_labels)
        if av_tags is None:
            return

        # Build results
        section = self._get_result_section(av_tags.family, av_tags.is_pup)
        for tag_section in self._get_category_sections(av_tags.tags):
            section.add_subsection(tag_section)

        result.add_section(section)
	def execute(self, request):
		result = Result()
		file = request.file_path

		with open(file, "rb") as f:
			file_content = f.read()

		content_list = autoit_ripper.extract(data=file_content)

		if content_list:
			content = content_list[0][1].decode("utf-8")

			text_section = ResultSection('[DUMP RESULT]')
			text_section.add_line(content)
			text_section.set_heuristic(1)
			result.add_section(text_section)

			with open(self.working_directory + "script.au3", "w") as f:
				f.write(content)
			request.add_extracted(self.working_directory + 'script.au3', 'script.au3', 'This is the unpacked script')
		
		request.result = result
    def execute(self, request):
        qr = xqrcode.decode_from_file(request.file_path)
        if len(qr) > 0:
            result_url = qr[0]['data']
            result = Result()
            text_section = ResultSection('QR Code')
            text_section.add_line(result_url)
            result.add_section(text_section)

            url_section = ResultSection('url extracted',
                                        body_format=BODY_FORMAT.URL,
                                        body=json.dumps({
                                            "name": "QR Code Url",
                                            "url": f"{result_url}"
                                        }))

            url_section.add_tag("network.static.domain", result_url)
            result.add_section(url_section)

            request.result = result
        else:
            request.result = Result()
Example #13
0
    def execute(self, request):
        result = Result()
        file_path = request.file_path

        p1 = subprocess.Popen("clamscan -a -z --detect-pua --alert-macros " +
                              file_path,
                              shell=True,
                              stdout=subprocess.PIPE)
        p1.wait()
        stdout = p1.communicate()[0].decode("utf-8")

        report = stdout.split("\n")
        report = list(filter(None, report))

        text_section = ResultSection("Successfully scanned the file")
        if "FOUND" in report[0]:
            text_section.set_heuristic(1)

        for l in report:
            text_section.add_line(l)

        result.add_section(text_section)
        request.result = result
 def execute(self, request: ServiceRequest) -> None:
     result = Result()
     self.hits = {}  # clear the hits dict
     path = request.file_path
     file_name = request.file_name
     self.log.info(f" Executing {file_name}")
     self.log.info(f"Number of rules {len(self.sigma_parser.rules)}")
     self.sigma_parser.register_callback(self.sigma_hit)
     self.sigma_parser.check_logfile(path)
     if len(self.hits) > 0:
         hit_section = ResultSection('Events detected as suspicious')
         # group alerts together
         for id, events in self.hits.items():
             title = self.sigma_parser.rules[id].title
             section = SigmaHitSection(title, events)
             tags = self.sigma_parser.rules[id].tags
             if tags:
                 for tag in tags:
                     name = tag[7:]
                     if name.startswith(('t', 'g', 's')):
                         attack_id = name.upper()
             source = events[0]['signature_source']
             if attack_id:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       attack_id=attack_id,
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             else:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             for event in events:
                 # add the event data as a subsection
                 section.add_subsection(EventDataSection(event))
             hit_section.add_subsection(section)
         result.add_section(hit_section)
     request.result = result
    def parse_results(self, response: Dict[str, Any]) -> Result:
        """
        This method parses the response JSON containing the scan results so that it will be displayed nicely in
        Assemblyline
        :param response: The raw results from the MetaDefender scan
        :return: The Result object to be used when displaying in Assemblyline
        """
        res = Result()
        scan_results = response.get('scan_results', response)
        virus_name = ""
        process_results = response.get('process_info', response)
        hit = False
        fail = False
        processed = {}
        if scan_results is not None and scan_results.get(
                'progress_percentage') == 100:
            no_threat_detected = []
            av_hits = ResultSection('AV Detections as Infected or Suspicious')
            av_fails = ResultSection('Failed to Scan or No Threats Detected')

            scans = scan_results.get('scan_details', scan_results)
            av_scan_times = []
            modified_scans = {
                key: value
                for key, value in scans.items()
                if key not in ["progress_percentage"]
            }
            for majorkey, subdict in sorted(modified_scans.items()):
                if majorkey in self.blocklist:
                    continue
                heur_id = None
                if subdict['scan_result_i'] == 1:  # File is infected
                    virus_name = subdict['threat_found']
                    if virus_name:
                        heur_id = 1
                elif subdict['scan_result_i'] == 2:  # File is suspicious
                    virus_name = subdict['threat_found']
                    if virus_name:
                        heur_id = 2
                elif subdict['scan_result_i'] == 10 or subdict[
                        'scan_result_i'] == 3:  # File was not scanned or failed
                    # noinspection PyBroadException
                    try:
                        engine = self.nodes[self.current_node]['engine_map'][
                            self._format_engine_name(majorkey)]
                    except Exception:
                        engine = None
                    fail = True
                    av_fails.add_subsection(AvErrorSection(majorkey, engine))
                elif subdict['scan_result_i'] == 0:  # No threat detected
                    no_threat_detected.append(majorkey)
                    fail = True

                if heur_id is not None:
                    virus_name = virus_name.replace("a variant of ", "")
                    engine = self.nodes[self.current_node]['engine_map'][
                        self._format_engine_name(majorkey)]
                    av_hit_section = AvHitSection(majorkey, virus_name, engine,
                                                  heur_id,
                                                  self.sig_score_revision_map,
                                                  self.kw_score_revision_map,
                                                  self.safelist_match)
                    av_hits.add_subsection(av_hit_section)
                    hit = True

                av_scan_times.append(self._format_engine_name(majorkey))
                av_scan_times.append(subdict['scan_time'])

            if hit:
                res.add_section(av_hits)

            # Only creat a result section for "No Threat Detected" if there was at least one hit
            if hit and fail:
                if no_threat_detected:
                    ResultSection(
                        "No Threat Detected by AV Engine(s)",
                        body_format=BODY_FORMAT.KEY_VALUE,
                        body=json.dumps(
                            dict(no_threat_detected=no_threat_detected)),
                        parent=av_fails)

                res.add_section(av_fails)

            file_size = response['file_info']['file_size']
            queue_time = response['process_info']['queue_time']
            processing_time = response['process_info']['processing_time']
            self.log.info(
                f"File successfully scanned by node ({self.current_node}). File size: {file_size} B."
                f"Queue time: {queue_time} ms. Processing time: {processing_time} ms. "
                f"AV scan times: {str(av_scan_times)}")

            # Add the queue time to a list, which will be later used to calculate average queue time
            self.nodes[self.current_node]['queue_times'].append(queue_time)
            self.nodes[self.current_node]['file_count'] += 1
        if process_results is not None and process_results.get(
                'progress_percentage') == 100:
            hit = False
            fail = False
            processed = process_results.get('post_processing', process_results)
            if processed['actions_failed']:
                fail = True
            elif processed['actions_ran']:
                hit = True
        # add cdr json extracted
        if hit:
            cdr_json_section = ResultSection('CDR Successfully Executed',
                                             body_format=BODY_FORMAT.JSON,
                                             body=json.dumps(processed))
            res.add_section(cdr_json_section)
        if fail:
            cdr_fails = ResultSection('CDR Failed or No Malicious Files Found')
            res.add_section(cdr_fails)

        return res
    def run_badging_analysis(self, apk_file: str, result: Result):
        badging_args = ['d', 'badging', apk_file]
        badging, errors = self.run_appt(badging_args)
        if not badging:
            return
        res_badging = ResultSection("Android application details")
        libs = []
        permissions = []
        components = []
        features = []
        pkg_version = None
        for line in badging.splitlines():
            if line.startswith("package:"):
                pkg_name = line.split("name='")[1].split("'")[0]
                pkg_version = line.split("versionCode='")[1].split("'")[0]
                res_badging.add_line(f"Package: {pkg_name} v.{pkg_version}")
                res_badging.add_tag('file.apk.pkg_name', pkg_name)
                res_badging.add_tag('file.apk.app.version', pkg_version)

            if line.startswith("sdkVersion:"):
                min_sdk = line.split(":'")[1][:-1]
                res_badging.add_line(f"Min SDK: {min_sdk}")
                res_badging.add_tag('file.apk.sdk.min', min_sdk)

            if line.startswith("targetSdkVersion:"):
                target_sdk = line.split(":'")[1][:-1]
                res_badging.add_line(f"Target SDK: {target_sdk}")
                res_badging.add_tag('file.apk.sdk.target', target_sdk)

            if line.startswith("application-label:"):
                label = line.split(":'")[1][:-1]
                res_badging.add_line(f"Default Label: {label}")
                res_badging.add_tag('file.apk.app.label', label)

            if line.startswith("launchable-activity:"):
                launch = line.split("name='")[1].split("'")[0]
                res_badging.add_line(f"Launchable activity: {launch}")
                res_badging.add_tag('file.apk.activity', launch)

            if line.startswith("uses-library-not-required:"):
                lib = line.split(":'")[1][:-1]
                if lib not in libs:
                    libs.append(lib)

            if line.startswith("uses-permission:") or line.startswith("uses-implied-permission:"):
                perm = line.split("name='")[1].split("'")[0]
                if perm not in permissions:
                    permissions.append(perm)

            if line.startswith("provides-component:"):
                component = line.split(":'")[1][:-1]
                if component not in components:
                    components.append(component)

            if "uses-feature:" in line or "uses-implied-feature:" in line:
                feature = line.split("name='")[1].split("'")[0]
                if feature not in features:
                    features.append(feature)

        if pkg_version is not None:
            pkg_version = int(pkg_version)
            if pkg_version < 15:
                ResultSection("Package version is suspiciously low", parent=res_badging,
                              heuristic=Heuristic(17))
            elif pkg_version > 999999999:
                ResultSection("Package version is suspiciously high", parent=res_badging,
                              heuristic=Heuristic(17))

        if libs:
            res_lib = ResultSection("Libraries used", parent=res_badging)
            for lib in libs:
                res_lib.add_line(lib)
                res_lib.add_tag('file.apk.used_library', lib)

        if permissions:
            res_permissions = ResultSection("Permissions used", parent=res_badging)
            dangerous_permissions = []
            unknown_permissions = []
            for perm in permissions:
                if perm in ALL_ANDROID_PERMISSIONS:
                    if 'dangerous' in ALL_ANDROID_PERMISSIONS[perm]:
                        dangerous_permissions.append(perm)
                    else:
                        res_permissions.add_line(perm)
                        res_permissions.add_tag('file.apk.permission', perm)
                else:
                    unknown_permissions.append(perm)

            if len(set(permissions)) < len(permissions):
                ResultSection("Some permissions are defined more then once", parent=res_badging,
                              heuristic=Heuristic(18))

            if dangerous_permissions:
                res_dangerous_perm = ResultSection("Dangerous permissions used", parent=res_badging,
                                                   heuristic=Heuristic(4))
                for perm in dangerous_permissions:
                    res_dangerous_perm.add_line(perm)
                    res_dangerous_perm.add_tag('file.apk.permission', perm)

            if unknown_permissions:
                res_unknown_perm = ResultSection("Unknown permissions used", parent=res_badging,
                                                 heuristic=Heuristic(5))
                for perm in unknown_permissions:
                    res_unknown_perm.add_line(perm)
                    res_unknown_perm.add_tag('file.apk.permission', perm)

        if features:
            res_features = ResultSection("Features used", parent=res_badging)
            for feature in features:
                res_features.add_line(feature)
                res_features.add_tag('file.apk.feature', feature)

        if components:
            res_components = ResultSection("Components provided", parent=res_badging)
            for component in components:
                res_components.add_line(component)
                res_components.add_tag('file.apk.provides_component', component)

        result.add_section(res_badging)
    def peepdf_analysis(self, temp_filename, file_content, request):
        file_res = Result()
        try:
            res_list = []
            # js_stream = []
            f_list = []
            js_dump = []

            pdf_parser = PDFParser()
            ret, pdf_file = pdf_parser.parse(temp_filename, True, False, file_content)
            if ret == 0:
                stats_dict = pdf_file.getStats()

                if ", ".join(stats_dict['Errors']) == "Bad PDF header, %%EOF not found, PDF sections not found, No " \
                                                      "indirect objects found in the body":
                    # Not a PDF
                    return

                json_body = dict(
                    version=stats_dict['Version'],
                    binary=stats_dict['Binary'],
                    linearized=stats_dict['Linearized'],
                    encrypted=stats_dict['Encrypted'],
                )

                if stats_dict['Encryption Algorithms']:
                    temp = []
                    for algorithmInfo in stats_dict['Encryption Algorithms']:
                        temp.append(f"{algorithmInfo[0]} {str(algorithmInfo[1])} bits")
                    json_body["encryption_algorithms"] = temp

                json_body.update(dict(
                    updates=stats_dict['Updates'],
                    objects=stats_dict['Objects'],
                    streams=stats_dict['Streams'],
                    comments=stats_dict['Comments'],
                    errors={True: ", ".join(stats_dict['Errors']),
                            False: "None"}[len(stats_dict['Errors']) != 0]
                ))
                res = ResultSection("PDF File Information", body_format=BODY_FORMAT.KEY_VALUE,
                                    body=json.dumps(json_body))

                for version in range(len(stats_dict['Versions'])):
                    stats_version = stats_dict['Versions'][version]
                    v_json_body = dict(
                        catalog=stats_version['Catalog'] or "no",
                        info=stats_version['Info'] or "no",
                        objects=self.list_first_x(stats_version['Objects'][1]),
                    )

                    if stats_version['Compressed Objects'] is not None:
                        v_json_body['compressed_objects'] = self.list_first_x(stats_version['Compressed Objects'][1])

                    if stats_version['Errors'] is not None:
                        v_json_body['errors'] = self.list_first_x(stats_version['Errors'][1])

                    v_json_body['streams'] = self.list_first_x(stats_version['Streams'][1])

                    if stats_version['Xref Streams'] is not None:
                        v_json_body['xref_streams'] = self.list_first_x(stats_version['Xref Streams'][1])

                    if stats_version['Object Streams'] is not None:
                        v_json_body['object_streams'] = self.list_first_x(stats_version['Object Streams'][1])

                    if int(stats_version['Streams'][0]) > 0:
                        v_json_body['encoded'] = self.list_first_x(stats_version['Encoded'][1])
                        if stats_version['Decoding Errors'] is not None:
                            v_json_body['decoding_errors'] = self.list_first_x(stats_version['Decoding Errors'][1])

                    if stats_version['Objects with JS code'] is not None:
                        v_json_body['objects_with_js_code'] = \
                            self.list_first_x(stats_version['Objects with JS code'][1])
                        # js_stream.extend(stats_version['Objects with JS code'][1])

                    res_version = ResultSection(f"Version {str(version)}", parent=res,
                                                body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(v_json_body))

                    actions = stats_version['Actions']
                    events = stats_version['Events']
                    vulns = stats_version['Vulns']
                    elements = stats_version['Elements']
                    is_suspicious = False
                    if events is not None or actions is not None or vulns is not None or elements is not None:
                        res_suspicious = ResultSection('Suspicious elements', parent=res_version)
                        if events is not None:
                            for event in events:
                                res_suspicious.add_line(f"{event}: {self.list_first_x(events[event])}")
                            is_suspicious = True
                        if actions is not None:
                            for action in actions:
                                res_suspicious.add_line(f"{action}: {self.list_first_x(actions[action])}")
                            is_suspicious = True
                        if vulns is not None:
                            for vuln in vulns:
                                if vuln in vulnsDict:
                                    temp = [vuln, ' (']
                                    for vulnCVE in vulnsDict[vuln]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                            vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE
                                            temp.append(vulnCVE)
                                            cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                            if cve_found:
                                                res_suspicious.add_tag('attribution.exploit',
                                                                       vulnCVE[cve_found.start():cve_found.end()])
                                                res_suspicious.add_tag('file.behavior',
                                                                       vulnCVE[cve_found.start():cve_found.end()])
                                    temp.append('): ')
                                    temp.append(str(vulns[vuln]))
                                    res_suspicious.add_line(temp)
                                else:
                                    res_suspicious.add_line(f"{vuln}: {str(vulns[vuln])}")
                                is_suspicious = True
                        if elements is not None:
                            for element in elements:
                                if element in vulnsDict:
                                    temp = [element, ' (']
                                    for vulnCVE in vulnsDict[element]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                        vulnCVE = "".join(vulnCVE) if isinstance(vulnCVE, list) else vulnCVE
                                        temp.append(vulnCVE)
                                        cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                        if cve_found:
                                            res_suspicious.add_tag('attribution.exploit',
                                                                   vulnCVE[cve_found.start():cve_found.end()])
                                            res_suspicious.add_tag('file.behavior',
                                                                   vulnCVE[cve_found.start():cve_found.end()])
                                    temp.append('): ')
                                    temp.append(str(elements[element]))
                                    res_suspicious.add_line(temp)
                                    is_suspicious = True
                                else:
                                    res_suspicious.add_line(f"\t\t{element}: {str(elements[element])}")
                                    is_suspicious = True
                    res_suspicious.set_heuristic(8) if is_suspicious else None

                    urls = stats_version['URLs']
                    if urls is not None:
                        res.add_line("")
                        res_url = ResultSection('Found URLs', parent=res)
                        for url in urls:
                            res_url.add_line(f"\t\t{url}")
                            res_url.set_heuristic(9)

                    for obj in stats_version['Objects'][1]:
                        cur_obj = pdf_file.getObject(obj, version)

                        if cur_obj.containsJScode:
                            cur_res = ResultSection(f"Object [{obj} {version}] contains {len(cur_obj.JSCode)} "
                                                    f"block of JavaScript")
                            score_modifier = 0

                            js_idx = 0
                            for js in cur_obj.JSCode:
                                sub_res = ResultSection('Block of JavaScript', parent=cur_res)
                                js_idx += 1
                                js_score = 0
                                js_code, unescaped_bytes, _, _, _ = analyseJS(js)

                                js_dump += [x for x in js_code]

                                # Malicious characteristics
                                big_buffs = self.get_big_buffs("".join(js_code))
                                if len(big_buffs) == 1:
                                    js_score += 500 * len(big_buffs)
                                if len(big_buffs) > 0:
                                    js_score += 500 * len(big_buffs)
                                has_eval, has_unescape = self.check_dangerous_func("".join(js_code))
                                if has_unescape:
                                    js_score += 100
                                if has_eval:
                                    js_score += 100

                                js_cmt = ""
                                if has_eval or has_unescape or len(big_buffs) > 0:
                                    score_modifier += js_score
                                    js_cmt = "Suspiciously malicious "
                                    cur_res.add_tag('file.behavior', "Suspicious JavaScript in PDF")
                                    sub_res.set_heuristic(7)
                                js_res = ResultSection(f"{js_cmt}JavaScript Code (block: {js_idx})", parent=sub_res)

                                if js_score > 0:
                                    temp_js_outname = f"object{obj}-{version}_{js_idx}.js"
                                    temp_js_path = os.path.join(self.working_directory, temp_js_outname)
                                    temp_js_bin = "".join(js_code).encode("utf-8")
                                    f = open(temp_js_path, "wb")
                                    f.write(temp_js_bin)
                                    f.close()
                                    f_list.append(temp_js_path)

                                    js_res.add_line(f"The JavaScript block was saved as {temp_js_outname}")
                                    if has_eval or has_unescape:
                                        analysis_res = ResultSection("[Suspicious Functions]", parent=js_res)
                                        if has_eval:
                                            analysis_res.add_line("eval: This JavaScript block uses eval() function "
                                                                  "which is often used to launch deobfuscated "
                                                                  "JavaScript code.")
                                            analysis_res.set_heuristic(3)
                                        if has_unescape:
                                            analysis_res.add_line("unescape: This JavaScript block uses unescape() "
                                                                  "function. It may be legitimate but it is definitely "
                                                                  "suspicious since malware often use this to "
                                                                  "deobfuscate code blocks.")
                                            analysis_res.set_heuristic(3)

                                    buff_idx = 0
                                    for buff in big_buffs:
                                        buff_idx += 1
                                        error, new_buff = unescape(buff)
                                        if error == 0:
                                            buff = new_buff

                                        if buff not in unescaped_bytes:
                                            temp_path_name = None
                                            if ";base64," in buff[:100] and "data:" in buff[:100]:
                                                temp_path_name = f"obj{obj}_unb64_{buff_idx}.buff"
                                                try:
                                                    buff = b64decode(buff.split(";base64,")[1].strip())
                                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                                    f = open(temp_path, "wb")
                                                    f.write(buff)
                                                    f.close()
                                                    f_list.append(temp_path)
                                                except Exception:
                                                    self.log.error("Found 'data:;base64, ' buffer "
                                                                   "but failed to base64 decode.")
                                                    temp_path_name = None

                                            if temp_path_name is not None:
                                                buff_cond = f" and was resubmitted as {temp_path_name}"
                                            else:
                                                buff_cond = ""
                                            buff_res = ResultSection(
                                                f"A {len(buff)} bytes buffer was found in the JavaScript "
                                                f"block{buff_cond}. Here are the first 256 bytes.",
                                                parent=js_res, body=hexdump(bytes(buff[:256], "utf-8")),
                                                body_format=BODY_FORMAT.MEMORY_DUMP)
                                            buff_res.set_heuristic(2)

                                processed_sc = []
                                sc_idx = 0
                                for sc in unescaped_bytes:
                                    if sc not in processed_sc:
                                        sc_idx += 1
                                        processed_sc.append(sc)

                                        try:
                                            sc = sc.decode("hex")
                                        except Exception:
                                            pass

                                        shell_score = 500
                                        temp_path_name = f"obj{obj}_unescaped_{sc_idx}.buff"

                                        shell_res = ResultSection(f"Unknown unescaped {len(sc)} bytes JavaScript "
                                                                  f"buffer (id: {sc_idx}) was resubmitted as "
                                                                  f"{temp_path_name}. Here are the first 256 bytes.",
                                                                  parent=js_res)
                                        shell_res.set_body(hexdump(sc[:256]), body_format=BODY_FORMAT.MEMORY_DUMP)

                                        temp_path = os.path.join(self.working_directory, temp_path_name)
                                        f = open(temp_path, "wb")
                                        f.write(sc)
                                        f.close()
                                        f_list.append(temp_path)

                                        cur_res.add_tag('file.behavior', "Unescaped JavaScript Buffer")
                                        shell_res.set_heuristic(6)
                                        score_modifier += shell_score

                            if score_modifier > 0:
                                res_list.append(cur_res)

                        elif cur_obj.type == "stream":
                            if cur_obj.isEncodedStream and cur_obj.filter is not None:
                                data = cur_obj.decodedStream
                                encoding = cur_obj.filter.value.replace("[", "").replace("]", "").replace("/",
                                                                                                          "").strip()
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            else:
                                data = cur_obj.rawStream
                                encoding = None
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            if otype:
                                otype = otype.value.replace("/", "").lower()
                            if sub_type:
                                sub_type = sub_type.value.replace("/", "").lower()
                            if length:
                                length = length.value

                            if otype == "embeddedfile":
                                if len(data) > 4096:
                                    if encoding is not None:
                                        temp_encoding_str = f"_{encoding}"
                                    else:
                                        temp_encoding_str = ""

                                    cur_res = ResultSection(
                                        f'Embedded file found ({length} bytes) [obj: {obj} {version}] '
                                        f'and dumped for analysis {f"(Type: {otype}) " if otype is not None else ""}'
                                        f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}'
                                        f'{f"(Encoded with {encoding})" if encoding is not None else ""}'
                                    )

                                    temp_path_name = f"EmbeddedFile_{obj}{temp_encoding_str}.obj"
                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                    f = open(temp_path, "wb")
                                    f.write(data)
                                    f.close()
                                    f_list.append(temp_path)

                                    cur_res.add_line(f"The EmbeddedFile object was saved as {temp_path_name}")
                                    res_list.append(cur_res)

                            elif otype not in BANNED_TYPES:
                                cur_res = ResultSection(
                                    f'Unknown stream found [obj: {obj} {version}] '
                                    f'{f"(Type: {otype}) " if otype is not None else ""}'
                                    f'{f"(SubType: {sub_type}) " if sub_type is not None else ""}'
                                    f'{f"(Encoded with {encoding})" if encoding is not None else ""}'
                                )
                                for line in val.splitlines():
                                    cur_res.add_line(line)

                                emb_res = ResultSection('First 256 bytes', parent=cur_res)
                                first_256 = data[:256]
                                if isinstance(first_256, str):
                                    first_256 = first_256.encode()
                                emb_res.set_body(hexdump(first_256), BODY_FORMAT.MEMORY_DUMP)
                                res_list.append(cur_res)
                        else:
                            pass

                file_res.add_section(res)

                for results in res_list:
                    file_res.add_section(results)

                if js_dump:
                    js_dump_res = ResultSection('Full JavaScript dump')

                    temp_js_dump = "javascript_dump.js"
                    temp_js_dump_path = os.path.join(self.working_directory, temp_js_dump)
                    try:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump).encode("utf-8")
                    except UnicodeDecodeError:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump)
                    temp_js_dump_sha1 = hashlib.sha1(temp_js_dump_bin).hexdigest()
                    f = open(temp_js_dump_path, "wb")
                    f.write(temp_js_dump_bin)
                    f.flush()
                    f.close()
                    f_list.append(temp_js_dump_path)

                    js_dump_res.add_line(f"The JavaScript dump was saved as {temp_js_dump}")
                    js_dump_res.add_line(f"The SHA-1 for the JavaScript dump is {temp_js_dump_sha1}")

                    js_dump_res.add_tag('file.pdf.javascript.sha1', temp_js_dump_sha1)
                    file_res.add_section(js_dump_res)

                for filename in f_list:
                    request.add_extracted(filename, os.path.basename(filename),
                                          f"Dumped from {os.path.basename(temp_filename)}")

            else:
                res = ResultSection("ERROR: Could not parse file with PeePDF.")
                file_res.add_section(res)
        finally:
            request.result = file_res
            try:
                del pdf_file
            except Exception:
                pass

            try:
                del pdf_parser
            except Exception:
                pass

            gc.collect()
Example #18
0
    def execute(self, request):
        file_path = request.file_path
        result = Result()

        # Report the version of suricata as the service context
        request.set_service_context(
            f"Suricata version: {self.get_suricata_version()}")

        # restart Suricata if we need to
        self.start_suricata_if_necessary()

        # Strip frame headers from the PCAP, since Suricata sometimes has trouble parsing strange PCAPs
        stripped_filepath = self.strip_frame_headers(file_path)

        # Check to make sure the size of the stripped file isn't 0 - this happens on pcapng files
        # TODO: there's probably a better way to do this - don't event strip it if it's pcapng
        if os.stat(stripped_filepath).st_size == 0:
            stripped_filepath = file_path

        # Switch stdout and stderr so we don't get our logs polluted
        mystdout = StringIO()
        old_stdout = sys.stdout
        sys.stdout = mystdout

        mystderr = StringIO()
        old_stderr = sys.stderr
        sys.stderr = mystderr

        # Pass the pcap file to Suricata via the socket
        ret = self.suricata_sc.send_command(
            "pcap-file", {
                "filename": stripped_filepath,
                "output-dir": self.working_directory
            })

        if not ret or ret["return"] != "OK":
            self.log.exception(
                f"Failed to submit PCAP for processing: {ret['message']}")

        # Wait for the socket finish processing our PCAP
        while True:
            time.sleep(1)
            try:
                ret = self.suricata_sc.send_command("pcap-current")
                if ret and ret["message"] == "None":
                    break
            except ConnectionResetError as e:
                raise RecoverableError(e)

        # Bring back stdout and stderr
        sys.stdout = old_stdout
        sys.stderr = old_stderr
        # NOTE: for now we will ignore content of mystdout and mystderr but we have them just in case...

        alerts, signatures, domains, ips, urls, email_addresses, tls_dict, extracted_files, reverse_lookup = self.parse_suricata_output(
        ).values()

        file_extracted_section = ResultSection("File(s) extracted by Suricata")
        # Parse the json results of the service
        if request.get_param("extract_files"):
            for file in extracted_files:
                sha256, filename, extracted_file_path = file.values()
                self.log.info(f"extracted file {filename}")
                try:
                    if request.add_extracted(
                            extracted_file_path,
                            filename,
                            "Extracted by Suricata",
                            safelist_interface=self.api_interface):
                        file_extracted_section.add_line(filename)
                        if filename != sha256:
                            file_extracted_section.add_tag(
                                'file.name.extracted', filename)
                except FileNotFoundError as e:
                    # An intermittent issue, just try again
                    raise RecoverableError(e)
                except MaxExtractedExceeded:
                    # We've hit our limit
                    pass

        # Report a null score to indicate that files were extracted. If no sigs hit, it's not clear
        # where the extracted files came from
        if file_extracted_section.body:
            result.add_section(file_extracted_section)

        # Add tags for the domains, urls, and IPs we've discovered
        root_section = ResultSection("Discovered IOCs", parent=result)
        if domains:
            domain_section = ResultSection("Domains", parent=root_section)
            for domain in domains:
                domain_section.add_line(domain)
                domain_section.add_tag('network.dynamic.domain', domain)
        if ips:
            ip_section = ResultSection("IP Addresses", parent=root_section)
            for ip in ips:
                # Make sure it's not a local IP
                if not (ip.startswith("127.") or ip.startswith("192.168.")
                        or ip.startswith("10.") or
                        (ip.startswith("172.")
                         and 16 <= int(ip.split(".")[1]) <= 31)):
                    ip_section.add_line(ip)
                    ip_section.add_tag('network.dynamic.ip', ip)

        if urls:
            url_section = ResultSection("URLs", parent=root_section)
            for url in urls:
                url_section.add_line(url)
                url_section.add_tag('network.dynamic.uri', url)
        if email_addresses:
            email_section = ResultSection("Email Addresses",
                                          parent=root_section)
            for eml in email_addresses:
                email_section.add_line(eml)
                email_section.add_tag('network.email.address', eml)

        # Map between suricata key names and AL tag types
        tls_mappings = {
            "subject": 'cert.subject',
            "issuerdn": 'cert.issuer',
            "version": 'cert.version',
            "notbefore": 'cert.valid.start',
            "notafter": 'cert.valid.end',
            "fingerprint": 'cert.thumbprint',
            "sni": 'network.tls.sni'
        }

        if tls_dict:
            tls_section = ResultSection("TLS Information",
                                        parent=root_section,
                                        body_format=BODY_FORMAT.JSON)
            kv_body = {}
            for tls_type, tls_values in tls_dict.items():
                if tls_type == "fingerprint":
                    # make sure the cert fingerprint/thumbprint matches other values,
                    # like from PEFile
                    tls_values = [
                        v.replace(":", "").lower() for v in tls_values
                    ]

                if tls_type in tls_mappings:
                    kv_body[tls_type] = tls_values

                    tag_type = tls_mappings[tls_type]
                    if tag_type is not None:
                        for tls_value in tls_values:
                            tls_section.add_tag(tag_type, tls_value)

                elif tls_type == "ja3":
                    kv_body.setdefault('ja3_hash', [])
                    kv_body.setdefault('ja3_string', [])

                    for ja3_entry in tls_values:
                        ja3_hash = ja3_entry.get("hash")
                        ja3_string = ja3_entry.get("string")
                        if ja3_hash:
                            kv_body['ja3_hash'].append(ja3_hash)
                            tls_section.add_tag('network.tls.ja3_hash',
                                                ja3_hash)
                        if ja3_string:
                            kv_body['ja3_string'].append(ja3_string)
                            tls_section.add_tag('network.tls.ja3_string',
                                                ja3_string)

                else:
                    kv_body[tls_type] = tls_values
                    # stick a message in the logs about a new TLS type found in suricata logs
                    self.log.info(
                        f"Found new TLS type {tls_type} with values {tls_values}"
                    )
            tls_section.set_body(json.dumps(kv_body))

        # Create the result sections if there are any hits
        if len(alerts) > 0:
            for signature_id, signature_details in signatures.items():
                signature = signature_details['signature']
                attributes = signature_details['attributes']
                section = ResultSection(f'{signature_id}: {signature}')
                heur_id = 3
                if any(x in signature for x in self.config.get("sure_score")):
                    heur_id = 1
                elif any(x in signature
                         for x in self.config.get("vhigh_score")):
                    heur_id = 2

                section.set_heuristic(heur_id)
                if signature_details['al_signature']:
                    section.add_tag("file.rule.suricata",
                                    signature_details['al_signature'])
                for timestamp, src_ip, src_port, dest_ip, dest_port in alerts[
                        signature_id][:10]:
                    section.add_line(
                        f"{timestamp} {src_ip}:{src_port} -> {dest_ip}:{dest_port}"
                    )
                if len(alerts[signature_id]) > 10:
                    section.add_line(
                        f'And {len(alerts[signature_id]) - 10} more flows')

                # Tag IPs/Domains/URIs associated to signature
                for flow in alerts[signature_id]:
                    dest_ip = flow[3]
                    section.add_tag('network.dynamic.ip', dest_ip)
                    if dest_ip in reverse_lookup.keys():
                        section.add_tag('network.dynamic.domain',
                                        reverse_lookup[dest_ip])
                    [
                        section.add_tag('network.dynamic.uri', uri)
                        for uri in urls
                        if dest_ip in uri or (reverse_lookup.get(dest_ip) and
                                              reverse_lookup[dest_ip] in uri)
                    ]

                # Add a tag for the signature id and the message
                section.add_tag('network.signature.signature_id',
                                str(signature_id))
                section.add_tag('network.signature.message', signature)
                [
                    section.add_tag('network.static.uri', attr['uri'])
                    for attr in attributes if attr.get('uri')
                ]
                # Tag malware_family
                for malware_family in signature_details['malware_family']:
                    section.add_tag('attribution.family', malware_family)

                result.add_section(section)
                self.ontology.add_result_part(
                    Signature,
                    data=dict(
                        name=signature_details['al_signature'],
                        type="SURICATA",
                        malware_families=signature_details['malware_family']
                        or None,
                        attributes=attributes))

            # Add the original Suricata output as a supplementary file in the result
            request.add_supplementary(
                os.path.join(self.working_directory, 'eve.json'),
                'SuricataEventLog.json', 'json')

        # Add the stats.log to the result, which can be used to determine service success
        if os.path.exists(os.path.join(self.working_directory, 'stats.log')):
            request.add_supplementary(
                os.path.join(self.working_directory, 'stats.log'), 'stats.log',
                'log')

        request.result = result
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                                  '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                                  'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id("T1066")
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2)
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA,
                                              body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL,
                                        body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"}))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            ip2 = get_random_ip()
            ip3 = get_random_ip()
            urls = [
                {"url": f"https://{host1}/"},
                {"url": f"https://{host2}/"},
                {"url": f"https://{ip1}/"},
                {"url": f"https://{ip2}/"},
                {"url": f"https://{ip3}/"}]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(urls))

            url_sub_section = ResultSection('Example of a url section with multiple links',
                                            body=json.dumps(urls), body_format=BODY_FORMAT.URL,
                                            heuristic=url_heuristic)
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.ip", ip2)
            url_sub_section.add_tag("network.static.ip", ip3)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(b"This is some random text that we will format as an hexdump and you'll see "
                           b"that the hexdump formatting will be preserved by the memory dump section!")
            memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP,
                                            body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [
                        {"d1_key": "val", "d1_key2": "val2"},
                        {"d2_key": "val", "d2_key2": "val2"}
                    ],
                    "bool": True
                }
            }
            json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "children": [] NB: This list either is empty or contains more dictionaries that have the same
            #                          structure
            #     }
            nc_body = [
                {
                    "process_pid": 123,
                    "process_name": "evil.exe",
                    "command_line": "C:\\evil.exe",
                    "signatures": {},
                    "children": [
                        {
                            "process_pid": 321,
                            "process_name": "takeovercomputer.exe",
                            "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff",
                            "signatures": {"one":250},
                            "children": [
                                {
                                    "process_pid": 456,
                                    "process_name": "evenworsethanbefore.exe",
                                    "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                                    "signatures": {"one":10, "two":10, "three":10},
                                    "children": []
                                },
                                {
                                    "process_pid": 234,
                                    "process_name": "badfile.exe",
                                    "command_line": "C:\\badfile.exe -k nothing_to_see_here",
                                    "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10},
                                    "children": []
                                }
                            ]
                        },
                        {
                            "process_pid": 345,
                            "process_name": "benignexe.exe",
                            "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                            "signatures": {"one": 2000},
                            "children": []
                        }
                    ]
                },
                {
                    "process_pid": 987,
                    "process_name": "runzeroday.exe",
                    "command_line": "C:\\runzeroday.exe -f insert_bad_spelling",
                    "signatures": {},
                    "children": []
                }
            ]
            nc_section = ResultSection('Example of a PROCESS_TREE section',
                                       body_format=BODY_FORMAT.PROCESS_TREE,
                                       body=json.dumps(nc_body))
            result.add_section(nc_section)
            
            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_body = [
                {
                    "a_str": "Some string1",
                    "extra_column_here": "confirmed",
                    "a_bool": False,
                    "an_int": 101,
                },
                {
                    "a_str": "Some string2",
                    "a_bool": True,
                    "an_int": 102,
                },
                {
                    "a_str": "Some string3",
                    "a_bool": False,
                    "an_int": 103,
                },
                {
                    "a_str": "Some string4",
                    "a_bool": None,
                    "an_int": -1000000000000000000,
                    "extra_column_there": "confirmed",
                    "nested_table": {
                        "a_str": "Some string3",
                        "a_bool": False,
                        "nested_table_thats_too_deep": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "an_int": 103,
                        },
                    },
                },
            ]
            table_section = ResultSection('Example of a TABLE section',
                                          body_format=BODY_FORMAT.TABLE,
                                          body=json.dumps(table_body))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt", "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__"+data.encode())
            request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
class ViperMonkey(ServiceBase):
    def __init__(self, config: Optional[Dict] = None) -> None:
        super().__init__(config)

        self.ip_list: List[str] = []
        self.url_list: List[str] = []
        self.found_powershell = False
        self.file_hashes: List[str] = []

        self.result: Optional[Result] = None

    def start(self) -> None:
        self.log.debug("ViperMonkey service started")

    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )

    def extract_powershell(self, parameter: str, section: ResultSection,
                           request: ServiceRequest) -> None:
        """Searches parameter for PowerShell, adds as extracted if found

        Args:
            parameter: String to be searched
            section: Section to be modified if PowerShell found
        """

        matches = find_powershell_strings(parameter.encode())

        if not matches:
            return

        self.found_powershell = True

        for match in matches:
            powershell_command = get_powershell_command(match.value)
            sha256hash = hashlib.sha256(powershell_command).hexdigest()
            # Add PowerShell code as extracted, account for duplicates
            if sha256hash not in self.file_hashes:
                powershell_filename = f"{sha256hash[0:10]}.ps1"
                ResultSection(
                    "Discovered PowerShell code in parameter.",
                    parent=section,
                    body=powershell_command[:100].decode() +
                    f"... see [{powershell_filename}]",
                )
                powershell_file_path = os.path.join(self.working_directory,
                                                    powershell_filename)
                with open(powershell_file_path, "wb") as f:
                    f.write(powershell_command)
                request.add_extracted(
                    powershell_file_path, powershell_filename,
                    "Discovered PowerShell code in parameter")
                self.file_hashes.append(sha256hash)

    def find_ip(self, parameter: str) -> None:
        """
        Parses parameter for urls/ip addresses, adds them to their respective lists

        Args:
            parameter: String to be searched
        """

        url_list = re.findall(r"https?://(?:[-\w.]|(?:[\da-zA-Z/?=%&]))+",
                              parameter)
        ip_list = re.findall(R_IP, parameter)

        for url in url_list:
            url_strip = url.strip()
            if url_strip:
                self.url_list.append(url_strip)
        for ip in ip_list:
            ip_strip = ip.strip()
            if ip_strip:
                self.ip_list.append(ip_strip)

    def add_ip_tags(self) -> None:
        """
        Adds tags for urls and ip addresses from given lists
        """

        if self.url_list or self.ip_list:
            sec_iocs = ResultSection(
                "ViperMonkey has found the following IOCs:",
                parent=self.result,
                heuristic=Heuristic(4))

            # Add Urls
            for url in set(self.url_list):
                sec_iocs.add_line(url)
                sec_iocs.add_tag("network.static.uri", url)
                try:
                    parsed = urlparse(url)
                    if parsed.hostname and not re.match(
                            IP_ONLY_REGEX, parsed.hostname):
                        sec_iocs.add_tag("network.static.domain",
                                         parsed.hostname)

                except Exception:
                    pass

            # Add IPs
            for ip in set(self.ip_list):
                sec_iocs.add_line(ip)
                # Checking if IP ports also found and adding the corresponding tags
                if re.findall(":", ip):
                    net_ip, net_port = ip.split(":")
                    sec_iocs.add_tag("network.static.ip", net_ip)
                    sec_iocs.add_tag("network.port", net_port)
                else:
                    sec_iocs.add_tag("network.static.ip", ip)

    def check_for_b64(self, data: str, section: ResultSection,
                      request: ServiceRequest, file_contents: bytes) -> bool:
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: base64 subsection, must have heuristic set

        Returns:
            decoded: Boolean which is true if base64 found
        """
        assert section.heuristic

        decoded_param = data
        decoded = False

        encoded_data = data.encode()
        for content, start, end in find_base64(encoded_data):
            if encoded_data[start:end] in file_contents:
                # Present in original file, not an intermediate IoC
                continue
            try:
                # Powershell base64 will be utf-16
                content = content.decode("utf-16").encode()
            except UnicodeDecodeError:
                pass
            try:
                if len(content) < FILE_PARAMETER_SIZE:
                    decoded_param = decoded_param[:
                                                  start] + " " + content.decode(
                                                      errors="ignore"
                                                  ) + decoded_param[end:]
                else:
                    b64hash = ""
                    pe_files = find_pe_files(content)
                    for pe_file in pe_files:
                        b64hash = hashlib.sha256(pe_file).hexdigest()
                        pe_path = os.path.join(self.working_directory, b64hash)
                        with open(pe_path, "wb") as f:
                            f.write(pe_file)
                        request.add_extracted(
                            pe_path, b64hash,
                            "PE file found in base64 encoded parameter")
                        section.heuristic.add_signature_id("pe_file")
                    if not pe_files:
                        b64hash = hashlib.sha256(content).hexdigest()
                        content_path = os.path.join(self.working_directory,
                                                    b64hash)
                        with open(content_path, "wb") as f:
                            f.write(content)
                        request.add_extracted(
                            content_path, b64hash,
                            "Large base64 encoded parameter")
                        section.heuristic.add_signature_id("possible_file")
                    decoded_param = decoded_param[:
                                                  start] + f"[See extracted file {b64hash}]" + decoded_param[
                                                      end:]
                decoded = True
            except Exception:
                pass

        if decoded:
            section.heuristic.increment_frequency()
            section.add_line(
                f"Possible Base64 {truncate(data)} decoded: {decoded_param}")
            self.find_ip(decoded_param)

        return decoded
Example #21
0
    def _add_resultinfo_for_match(self, result: Result, match):
        """
        Parse from Yara signature match and add information to the overall AL service result. This module determines
        result score and identifies any AL tags that should be added (i.e. IMPLANT_NAME, THREAT_ACTOR, etc.).

        Args:
            result: AL ResultSection object.
            match: Yara rules Match object item.

        Returns:
            None.
        """
        almeta = YaraMetadata(match)
        self._normalize_metadata(almeta)

        section = ResultSection('', classification=almeta.classification)
        if self.deep_scan or almeta.al_status != "NOISY":
            section.set_heuristic(self.YARA_HEURISTICS_MAP.get(
                almeta.category, 1),
                                  signature=f'{match.namespace}.{match.rule}',
                                  attack_id=almeta.mitre_att)
        section.add_tag(f'file.rule.{self.name.lower()}',
                        f'{match.namespace}.{match.rule}')

        title_elements = [
            f"[{match.namespace}] {match.rule}",
        ]

        if almeta.actor_type:
            section.add_tag('attribution.actor', almeta.actor_type)

        for tag in almeta.tags:
            section.add_tag(tag['type'], tag['value'])

        # Malware Tags
        implant_title_elements = []
        for (implant_name, implant_family) in almeta.malwares:
            if implant_name:
                implant_title_elements.append(implant_name)
                section.add_tag('attribution.implant', implant_name)
            if implant_family:
                implant_title_elements.append(implant_family)
                section.add_tag('attribution.family', implant_family)
        if implant_title_elements:
            title_elements.append(
                f"- Implant(s): {', '.join(implant_title_elements)}")

        # Threat Actor metadata
        for actor in almeta.actors:
            title_elements.append(actor)
            section.add_tag('attribution.actor', actor)

        # Exploit / CVE metadata
        if almeta.exploits:
            title_elements.append(
                f"- Exploit(s): {', '.join(almeta.exploits)}")
        for exploit in almeta.exploits:
            section.add_tag('attribution.exploit', exploit)

        # Include technique descriptions in the section behavior
        for (category, name) in almeta.techniques:
            descriptor = self.TECHNIQUE_DESCRIPTORS.get(category, None)
            if descriptor:
                technique_type, technique_description = descriptor
                section.add_tag(technique_type, name)
                almeta.behavior.add(technique_description)

        for (category, name) in almeta.infos:
            descriptor = self.INFO_DESCRIPTORS.get(category, None)
            if descriptor:
                info_type, info_description = descriptor
                section.add_tag(info_type, name)
                almeta.behavior.add(info_description)

        # Summaries
        if almeta.behavior:
            title_elements.append(f"- Behavior: {', '.join(almeta.behavior)}")
        for element in almeta.behavior:
            section.add_tag('file.behavior', element)

        title = " ".join(title_elements)
        section.title_text = title

        json_body = dict(name=match.rule, )

        for item in [
                'id', 'version', 'author', 'description', 'source', 'malware',
                'info', 'technique', 'tool', 'exploit', 'actor', 'category',
                'mitre_att'
        ]:
            val = almeta.__dict__.get(item, None)
            if val:
                json_body[item] = val

        string_match_data = self._add_string_match_data(match)
        if string_match_data:
            json_body['string_hits'] = string_match_data

        section.set_body(json.dumps(json_body),
                         body_format=BODY_FORMAT.KEY_VALUE)

        result.add_section(section)
class ViperMonkey(ServiceBase):
    def __init__(self, config=None):
        super(ViperMonkey, self).__init__(config)

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        self.request = None
        self.result = None

    def start(self):
        self.log.debug('ViperMonkey service started')

    def execute(self, request):
        self.result = Result()
        request.result = self.result
        self.request = request

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions = []
        external_functions = []
        tmp_iocs = []
        output_results = {}

        # Running ViperMonkey
        try:
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             'vipermonkey_compat.py2'), request.file_path
            ])
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            stdout, _ = p.communicate()

            # Read output
            if stdout:
                for l in stdout.splitlines():
                    if l.startswith(b"{") and l.endswith(b"}"):
                        try:
                            output_results = json.loads(l)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                l.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][], if error, return is None
                if type(output_results.get('vmonkey_values')) == dict:
                    '''
                    Structure of variable "actions" is as follows:
                    [action, description, parameter]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameter: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    '''
                    actions = output_results['vmonkey_values']['actions']
                    external_functions = output_results['vmonkey_values'][
                        'external_funcs']
                    tmp_iocs = output_results['vmonkey_values']['tmp_iocs']
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            raise

        # Add vmonkey log as a supplemental file
        if 'stdout' in output_results:
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f'{request.sid}_vipermonkey_output.log')
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results['stdout'])

            self.request.add_supplementary(temp_log_copy,
                                           'vipermonkey_output.log',
                                           'ViperMonkey log output')
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1))

        if len(actions) > 0:
            # Creating action section
            action_section = ResultSection('Recorded Actions:',
                                           parent=self.result)
            action_section.add_tag('technique.macro', 'Contains VBA Macro(s)')
            for action in actions:  # Creating action sub-sections for each action
                cur_action = action[0]
                cur_description = action[2] if action[2] else cur_action

                # Entry point actions have an empty description field, re-organize result section for this case
                if cur_action == 'Found Entry Point':
                    sub_action_section = ResultSection('Found Entry Point',
                                                       parent=action_section)
                    sub_action_section.add_line(action[1])
                else:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(cur_description,
                                                       parent=action_section)
                    if cur_description == 'Shell function':
                        sub_action_section.set_heuristic(2)

                    # Parameters are sometimes stored as a list, account for this
                    if isinstance(action[1], list):
                        for item in action[1]:
                            # Parameters includes more than strings (booleans for example)
                            if isinstance(item, str):
                                # Check for PowerShell
                                self.extract_powershell(
                                    item, sub_action_section)
                        # Join list items into single string
                        param = ', '.join(str(a) for a in action[1])

                    else:
                        param = action[1]
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(param, str):
                            self.extract_powershell(param, sub_action_section)

                    sub_action_section.add_line(f'Action: {cur_action}')
                    sub_action_section.add_line(f'Parameters: {param}')

                    # If decoded is true, possible base64 string has been found
                    self.check_for_b64(param, sub_action_section)

                    # Add urls/ips found in parameter to respective lists
                    self.find_ip(param)

        # Check tmp_iocs
        res_temp_iocs = ResultSection('Runtime temporary IOCs')
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs)
            self.check_for_b64(ioc, res_temp_iocs)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection('Discovered PowerShell code in file',
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            vba_builtin_dict = {}
            dict_path = os.path.join(os.path.dirname(__file__),
                                     'VBA_built_ins.txt')
            with open(dict_path, 'r') as f:
                for line in f:
                    line = line.strip()
                    if re.search(r'^#', line):
                        continue
                    if line:
                        line = line.split(';')
                        vba_builtin_dict[line[0].strip()] = line[1].strip()

            external_func_section = ResultSection(
                'VBA functions called',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtin_dict:
                    external_func_section.add_line(func + ': ' +
                                                   vba_builtin_dict[func])
                else:
                    external_func_section.add_line(func)

    def extract_powershell(self, parameter, section):
        """Searches parameter for PowerShell, adds as extracted if found

        Args:
            parameter: String to be searched
            section: Section to be modified if PowerShell found
        """

        if re.findall(r'(?:powershell)|(?:pwsh)', parameter, re.IGNORECASE):
            self.found_powershell = True
            if type(parameter) == str:
                # Unicode-objects must be encoded before hashing
                sha256hash = hashlib.sha256(parameter.encode()).hexdigest()
            else:
                sha256hash = hashlib.sha256(parameter).hexdigest()
            ResultSection('Discovered PowerShell code in parameter.',
                          parent=section)

            # Add PowerShell code as extracted, account for duplicates
            if sha256hash not in self.file_hashes:
                self.file_hashes.append(sha256hash)
                powershell_filename = f'{sha256hash[0:25]}_extracted_powershell'
                powershell_file_path = os.path.join(self.working_directory,
                                                    powershell_filename)
                with open(powershell_file_path, 'w') as f:
                    f.write(parameter)
                    self.request.add_extracted(
                        powershell_file_path, powershell_filename,
                        'Discovered PowerShell code in parameter')

    def find_ip(self, parameter):
        """
        Parses parameter for urls/ip addresses, adds them to their respective lists

        Args:
            parameter: String to be searched
        """

        url_list = re.findall(r'https?://(?:[-\w.]|(?:[\da-zA-Z/?=%&]))+',
                              parameter)
        ip_list = re.findall(R_IP, parameter)

        for url in url_list:
            url_strip = url.strip()
            if url_strip:
                self.url_list.append(url_strip)
        for ip in ip_list:
            ip_strip = ip.strip()
            if ip_strip:
                self.ip_list.append(ip_strip)

    def add_ip_tags(self):
        """
        Adds tags for urls and ip addresses from given lists
        """

        if self.url_list or self.ip_list:
            sec_iocs = ResultSection(
                "ViperMonkey has found the following IOCs:",
                parent=self.result,
                heuristic=Heuristic(4))

            # Add Urls
            for url in set(self.url_list):
                sec_iocs.add_line(url)
                sec_iocs.add_tag('network.static.uri', url)
                try:
                    parsed = urlparse(url)
                    if not re.match(IP_ONLY_REGEX, parsed.hostname):
                        sec_iocs.add_tag('network.static.domain',
                                         parsed.hostname)

                except Exception:
                    pass

            # Add IPs
            for ip in set(self.ip_list):
                sec_iocs.add_line(ip)
                # Checking if IP ports also found and adding the corresponding tags
                if re.findall(":", ip):
                    net_ip, net_port = ip.split(':')
                    sec_iocs.add_tag('network.static.ip', net_ip)
                    sec_iocs.add_tag('network.port', net_port)
                else:
                    sec_iocs.add_tag('network.static.ip', ip)

    def check_for_b64(self, data, section):
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: Sub-section to be modified if base64 found

        Returns:
            decoded: Boolean which is true if base64 found
        """

        b64_matches = []
        # b64_matches_raw will be used for replacing in case b64_matches are modified
        b64_matches_raw = []
        decoded_param = data
        decoded = False

        for b64_match in re.findall(
                '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})',
                re.sub('\x3C\x00\x20{2}\x00', '', data)):
            b64 = b64_match.replace('\n', '').replace('\r', '').replace(
                ' ', '').replace('<', '')
            uniq_char = ''.join(set(b64))
            if len(uniq_char) > 6:
                if len(b64) >= 16 and len(b64) % 4 == 0:
                    b64_matches.append(b64)
                    b64_matches_raw.append(b64_match)
        for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw):
            try:
                base64data = binascii.a2b_base64(b64_string)
                # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex
                base64data_decoded = ' ' + base64data.decode('utf-16').encode(
                    'ascii', 'ignore')
                # Replace base64 from param with decoded string
                decoded_param = re.sub(b64_string_raw, base64data_decoded,
                                       decoded_param)
                decoded = True
            except Exception:
                pass

        if decoded:
            decoded_section = ResultSection('Possible Base64 found',
                                            parent=section,
                                            heuristic=Heuristic(5))
            decoded_section.add_line(
                f'Possible Base64 Decoded Parameters: {decoded_param}')
            self.find_ip(decoded_param)

        return decoded
    def execute(self, request):
        parser = eml_parser.eml_parser.EmlParser(include_raw_body=True,
                                                 include_attachment_data=True)

        # Validate URLs in sample, strip out [] if found
        content_str = request.file_contents.decode(errors="ignore")
        content_str, retry = self.validate_urls(content_str)
        while retry:
            content_str, retry = self.validate_urls(content_str)
        parsed_eml = parser.decode_email_bytes(content_str.encode())

        result = Result()
        header = parsed_eml['header']

        if "from" in header:
            all_uri = set()

            for body_counter, body in enumerate(parsed_eml['body']):
                if request.get_param('extract_body_text'):
                    fd, path = mkstemp()
                    with open(path, 'w') as f:
                        f.write(body['content'])
                        os.close(fd)
                    request.add_extracted(path, "body_" + str(body_counter),
                                          "Body text")
                if "uri" in body:
                    for uri in body['uri']:
                        all_uri.add(uri)

            kv_section = ResultSection('Email Headers',
                                       body_format=BODY_FORMAT.KEY_VALUE,
                                       parent=result)

            # Basic tags
            kv_section.add_tag("network.email.address", header['from'].strip())
            for to in header['to']:
                kv_section.add_tag("network.email.address", to)
            kv_section.add_tag("network.email.date",
                               str(header['date']).strip())
            kv_section.add_tag("network.email.subject",
                               header['subject'].strip())

            # Add CCs to body and tags
            if 'cc' in header:
                for to in header['to']:
                    kv_section.add_tag("network.email.address", to.strip())

            # Add Message ID to body and tags
            if 'message-id' in header['header']:
                kv_section.add_tag("network.email.msg_id",
                                   header['header']['message-id'][0].strip())

            # Add Tags for received IPs
            if 'received_ip' in header:
                for ip in header['received_ip']:
                    kv_section.add_tag('network.static.ip', ip.strip())

            # Add Tags for received Domains
            if 'received_domain' in header:
                for dom in header['received_domain']:
                    kv_section.add_tag('network.static.domain', dom.strip())

            # If we've found URIs, add them to a section
            if len(all_uri) > 0:
                uri_section = ResultSection('URIs Found:', parent=result)
                for uri in all_uri:
                    uri_section.add_line(uri)
                    uri_section.add_tag('network.static.uri', uri.strip())
                    parsed_url = urlparse(uri)
                    if parsed_url.hostname and re.match(
                            IP_ONLY_REGEX, parsed_url.hostname):
                        uri_section.add_tag('network.static.ip',
                                            parsed_url.hostname)
                    else:
                        uri_section.add_tag('network.static.domain',
                                            parsed_url.hostname)

            # Bring all headers together...
            extra_header = header.pop('header', {})
            header.pop('received', None)
            header.update(extra_header)

            kv_section.body = json.dumps(header, default=self.json_serial)

            if "attachment" in parsed_eml:
                for attachment in parsed_eml['attachment']:
                    fd, path = mkstemp()

                    with open(path, 'wb') as f:
                        f.write(base64.b64decode(attachment['raw']))
                        os.close(fd)
                    request.add_extracted(path, attachment['filename'],
                                          "Attachment ")
                ResultSection('Extracted Attachments:',
                              body="\n".join([
                                  x['filename']
                                  for x in parsed_eml['attachment']
                              ]),
                              parent=result)

            if request.get_param('save_emlparser_output'):
                fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
                with os.fdopen(fd, "w") as myfile:
                    myfile.write(
                        json.dumps(parsed_eml, default=self.json_serial))
                request.add_supplementary(
                    temp_path, "parsing.json",
                    "These are the raw results of running GOVCERT-LU's eml_parser"
                )
        else:
            text_section = ResultSection('EML parsing results')
            text_section.add_line("Could not parse EML")
            result.add_section(text_section)

        request.result = result
Example #24
0
    def execute(self, request: ServiceRequest) -> None:
        """ Main module see README for details. """
        start = time.time()

        result = Result()
        request.result = result
        file_path = request.file_path
        if request.deep_scan:
            # Maximum size of submitted file to run this service:
            max_size = 200000
            # String length maximum, used in basic ASCII and UNICODE modules:
            max_length = 1000000
            # String list maximum size
            # List produced by basic ASCII and UNICODE module results and will determine
            # if patterns.py will only evaluate network IOC patterns:
            st_max_size = 100000
            # Minimum string size for encoded/stacked string modules:
            enc_min_length = 7
            stack_min_length = 7
        else:
            max_size = self.config.get('max_size', 85000)
            max_length = self.config.get('max_length', 5000)
            st_max_size = self.config.get('st_max_size', 0)
            enc_min_length = self.config.get('enc_min_length', 7)
            stack_min_length = self.config.get('stack_min_length', 7)
        timeout = self.service_attributes.timeout - 50

        if len(request.file_contents) > max_size:
            return

        stack_args = [
            FLOSS, f'-n {stack_min_length}', '--no-decoded-strings', file_path
        ]
        decode_args = [
            FLOSS, f'-n {enc_min_length}', '-x', '--no-static-strings',
            '--no-stack-strings', file_path
        ]

        with Popen(stack_args, stdout=PIPE, stderr=PIPE) as stack, \
                Popen(decode_args, stdout=PIPE, stderr=PIPE) as decode:
            stack_out, _, timed_out = self.handle_process(
                stack, timeout + start - time.time(), ' '.join(stack_args))
            if timed_out:
                result.add_section(
                    ResultSection('FLARE FLOSS stacked strings timed out'))
                self.log.warning(
                    f'floss stacked strings timed out for sample {request.sha256}'
                )

            dec_out, dec_err, timed_out = self.handle_process(
                decode, timeout + start - time.time(), ' '.join(decode_args))
            if timed_out:
                result.add_section(
                    ResultSection('FLARE FLOSS decoded strings timed out'))
                self.log.warning(
                    f'floss decoded strings timed out for sample {request.sha256}'
                )

        if stack_out:
            sections = [[y for y in x.splitlines() if y]
                        for x in stack_out.split(b'\n\n')]
            for section in sections:
                if not section:  # skip empty
                    continue
                match = re.match(rb'FLOSS static\s+.*\s+strings', section[0])
                if match:
                    result_section = static_result(section, max_length,
                                                   st_max_size)
                    if result_section:
                        result.add_section(result_section)
                    continue
                match = re.match(rb'.*\d+ stackstring.*', section[0])
                if match:
                    result_section = stack_result(section)
                    if result_section:
                        result.add_section(result_section)
                    continue

        # Process decoded strings results
        if dec_out:
            result_section = decoded_result(dec_out)
            if result_section:
                if dec_err:
                    result_section.add_line(
                        "Flare Floss generated error messages while analyzing:"
                    )
                    result_section.add_line(safe_str(dec_err))
                result.add_section(result_section)
Example #25
0
    def execute(self, request):
        """Main Module. See README for details."""
        global imginfo
        result = Result()
        request.result = result
        self.sha = request.sha256
        infile = request.file_path
        run_steg = request.get_param('run_steg')

        # Run image-specific modules
        supported_images = re.compile('image/(bmp|gif|jpeg|jpg|png)')
        if re.match(supported_images, request.file_type):
            # Extract img info using Pillow (already available in steg.py) and determine if steg modules should be run
            if self.config['run_steg_auto'] or run_steg:
                decloak = True
            else:
                decloak = False
            try:
                imginfo = ImageInfo(infile, request, result,
                                    self.working_directory, self.log)
            except NotSupported:
                decloak = False

            # Run Tesseract on sample
            # Process the command and save the csv result in the result object
            usable_out = None
            orig_outfile = os.path.join(self.working_directory, 'outfile')
            stdout, stderr = self.tesseract_call(infile, orig_outfile)

            if stdout or stderr:
                # Assess Tesseract warnings
                if b"pix too small" in stderr:
                    # Make the image larger with convert command
                    c_outfile = os.path.join(self.working_directory,
                                             'enlrg_img')
                    c_stdout, c_stderr = self.convert_img(infile, c_outfile)
                    if c_stdout:
                        c_outfile = os.path.join(self.working_directory,
                                                 'c_outfile')
                        enlrg_infile = os.path.join(self.working_directory,
                                                    'enlrg')
                        if not c_stderr:
                            stdout, stderr = self.tesseract_call(
                                enlrg_infile, c_outfile)
                            if stdout:
                                if not stderr:
                                    outfile = c_outfile
                                else:
                                    outfile = orig_outfile
                            else:
                                outfile = orig_outfile
                        else:
                            outfile = orig_outfile
                    else:
                        outfile = orig_outfile
                else:
                    outfile = orig_outfile
                    self.log.debug(
                        "Tesseract errored/warned on sample {}. Error:{}".
                        format(self.sha, stderr))

                usable_out = self.assess_output(outfile, request)

            if usable_out:
                ores = ResultSection("OCR Engine detected strings in image",
                                     body_format=BODY_FORMAT.MEMORY_DUMP)
                ores.add_line("Text preview (up to 500 bytes):\n")
                ores.add_line("{}".format(usable_out[0:500]))
                result.add_section(ores)
            # Find attached data
            additional_content = self.find_additional_content(infile)
            if additional_content:
                ares = (ResultSection("Possible Appended Content Found",
                                      body_format=BODY_FORMAT.MEMORY_DUMP))
                ares.add_line(
                    "{} Bytes of content found at end of image file".format(
                        len(additional_content)))
                ares.add_line("Text preview (up to 500 bytes):\n")
                ares.add_line("{}".format(safe_str(additional_content)[0:500]))
                ares.set_heuristic(2)
                result.add_section(ares)
                file_name = "{}_appended_img_content".format(
                    hashlib.sha256(additional_content).hexdigest()[0:10])
                file_path = os.path.join(self.working_directory, file_name)
                request.add_extracted(file_path, file_name,
                                      "Carved content found at end of image.")
                with open(file_path, 'wb') as unibu_file:
                    unibu_file.write(additional_content)
            # Steganography modules
            if decloak:
                if request.deep_scan:
                    imginfo.decloak()
Example #26
0
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop two embedded file which one generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(random.randint(1, 4),
                                       signature=get_random_phrase(
                                           1, 4).lower().replace(" ", "_"))
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            section_color_map = ResultSection(
                "Example of colormap result section",
                body_format=BODY_FORMAT.GRAPH_DATA,
                body=json.dumps(color_map_data))
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section',
                                        body_format=BODY_FORMAT.URL,
                                        body=json.dumps({
                                            "name":
                                            "Random url!",
                                            "url":
                                            f"https://{random_host}/"
                                        }))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            urls = [{
                "url": f"https://{host1}/"
            }, {
                "url": f"https://{host2}/"
            }, {
                "url": f"https://{ip1}/"
            }]
            url_sub_section = ResultSection(
                'Example of a url section with multiple links',
                body_format=BODY_FORMAT.URL,
                body=json.dumps(urls))
            url_sub_section.set_heuristic(random.randint(1, 4))
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultSection(
                'Example of a memory dump section',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section',
                                       body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultSection('Example of a JSON section',
                                         body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultTextSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # You can tag data to a section, tagging is used to to quickly find defining information about a file
            text_section.add_tag("attribution.implant", "ResultSample")
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id(
                random.choice(list(software_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(attack_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(group_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(revoke_map.keys())))
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=2)
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # Even if the section was added to the results you can still modify it by adding a subsection for example
            ResultSection(
                "Example of sub-section without a body added later in processing",
                parent=text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            cmap_values = [random.random() * cmap_max for _ in range(50)]
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultGraphSection(
                "Example of colormap result section",
                classification=cl_engine.RESTRICTED)
            section_color_map.set_colormap(cmap_min, cmap_max, cmap_values)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultURLSection('Example of a simple url section')
            url_section.add_url(f"https://{random_host}/", name="Random url!")

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            hosts = [get_random_host() for _ in range(2)]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(hosts))

            url_sub_section = ResultURLSection(
                'Example of a url sub-section with multiple links',
                heuristic=url_heuristic,
                classification=cl_engine.RESTRICTED)
            for host in hosts:
                url_sub_section.add_url(f"https://{host}/")
                url_sub_section.add_tag("network.static.domain", host)

            # You can keep nesting sections if you really need to
            ips = [get_random_ip() for _ in range(3)]
            url_sub_sub_section = ResultURLSection(
                'Exemple of a two level deep sub-section')
            for ip in ips:
                url_sub_sub_section.add_url(f"https://{ip}/")
                url_sub_sub_section.add_tag("network.static.ip", ip)

            # Since url_sub_sub_section is a sub-section of url_sub_section
            # we will add it as a sub-section of url_sub_section not to the main result itself
            url_sub_section.add_subsection(url_sub_sub_section)

            # Invalid sections will be ignored, and an error will apear in the logs
            # Sub-sections of invalid sections will be ignored too
            invalid_section = ResultSection("")
            ResultSection(
                "I won't make it to the report because my parent is invalid :(",
                parent=invalid_section)
            url_sub_section.add_subsection(invalid_section)

            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)

            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultMemoryDumpSection(
                'Example of a memory dump section', body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a dictionary (only str, int, and booleans are allowed)
            kv_section = ResultKeyValueSection(
                'Example of a KEY_VALUE section')
            # You can add items individually
            kv_section.set_item('key', "value")
            # Or simply add them in bulk
            kv_section.update_items({
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            })
            result.add_section(kv_section)

            # ==================================================================
            # ORDERED_KEY_VALUE section:
            #     This section provides the same functionality as the KEY_VALUE section except the order of the fields
            #     are garanteed to be preserved in the order in which the fields are added to the section. Also with
            #     this section, you can repeat the same key name multiple times
            oredered_kv_section = ResultOrderedKeyValueSection(
                'Example of an ORDERED_KEY_VALUE section')
            # You can add items individually
            for x in range(random.randint(3, 6)):
                oredered_kv_section.add_item(f'key{x}', f"value{x}")

            result.add_section(oredered_kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultJSONSection('Example of a JSON section')
            # You can set the json result to a specific value
            json_section.set_json(json_body)
            # You can also update specific parts after the fact
            json_section.update_json({
                'an_int': 1000,
                'updated_key': 'updated_value'
            })

            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "signatures": {}  This dict has the signature name as a key and the score as it's value
            #       "children": []    NB: This list either is empty or contains more dictionaries that have the same
            #                             structure
            #     }
            process_tree_section = ResultProcessTreeSection(
                'Example of a PROCESS_TREE section')
            # You can use the ProcessItem class to create the processes to add to the result section
            evil_process = ProcessItem(123, "evil.exe", "c:\\evil.exe")
            evil_process_child_1 = ProcessItem(
                321, "takeovercomputer.exe",
                "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff")
            # You can add child processes to the ProcessItem objects
            evil_process_child_1.add_child_process(
                ProcessItem(
                    456,
                    "evenworsethanbefore.exe",
                    "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                    signatures={
                        "one": 10,
                        "two": 10,
                        "three": 10
                    }))
            evil_process_child_1.add_child_process(
                ProcessItem(234,
                            "badfile.exe",
                            "C:\\badfile.exe -k nothing_to_see_here",
                            signatures={
                                "one": 1000,
                                "two": 10,
                                "three": 10,
                                "four": 10,
                                "five": 10
                            }))

            # You can add signatures that hit on a ProcessItem Object
            evil_process_child_1.add_signature('one', 250)

            # Or even directly create the ProcessItem object with the signature in it
            evil_process_child_2 = ProcessItem(
                345,
                "benignexe.exe",
                "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                signatures={"one": 2000})

            # You can also add counts for network, file and registry events to a ProcessItem object
            evil_process_child_2.add_network_events(4)
            evil_process_child_2.add_file_events(7000)
            evil_process_child_2.add_registry_events(10)

            # You can also indicate if the process tree item has been safelisted
            benign_process = ProcessItem(678, "trustme.exe", "C:\\trustme.exe")
            benign_process.safelist()

            evil_process.add_child_process(evil_process_child_1)
            evil_process.add_child_process(evil_process_child_2)

            # Add your processes to the result section via the add_process function
            process_tree_section.add_process(evil_process)
            process_tree_section.add_process(
                ProcessItem(987, "runzeroday.exe",
                            "C:\\runzeroday.exe -f insert_bad_spelling"))
            process_tree_section.add_process(benign_process)

            result.add_section(process_tree_section)

            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_section = ResultTableSection('Example of a TABLE section')
            # Use the TableRow class to help adding row to the Table section
            table_section.add_row(
                TableRow(a_str="Some string1",
                         extra_column_here="confirmed",
                         a_bool=False,
                         an_int=101))
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string2",
                        "a_bool": True,
                        "an_int": "to_be_overriden_by_kwargs"
                    },
                    an_int=102))
            table_section.add_row(
                TableRow(a_str="Some string3", a_bool=False, an_int=103))
            # Valid values for the items in the TableRow are: str, int, bool, None, or dict of those values
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string4",
                        "a_bool": None,
                        "an_int": -1000000000000000000
                    }, {
                        "extra_column_there": "confirmed",
                        "nested_key_value_pair": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "nested_kv_thats_too_deep": {
                                "a_str": "Some string3",
                                "a_bool": False,
                                "an_int": 103,
                            },
                        }
                    }))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__" + data.encode())
            request.add_extracted(temp_path,
                                  "classified.doc",
                                  "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(url_sub_section.body)
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Zeroize on safe tags
            #     When this feature is turned on, the section will get its score set to zero if all its tags
            #     were safelisted by the safelisting engine
            zero_section = ResultSection('Example of zeroize-able section',
                                         zeroize_on_tag_safe=True)
            zero_section.set_heuristic(2)
            zero_section.add_line(
                "This section will have a zero score if all tags are safelisted."
            )
            zero_section.add_tag('network.static.ip', '127.0.0.1')
            result.add_section(zero_section)

            # ==================================================================
            # Auto-collapse
            #     When this feature is turned on, the section will be collapsed when first displayed
            collapse_section = ResultSection(
                'Example of auto-collapse section', auto_collapse=True)
            collapse_section.set_heuristic(2)
            collapse_section.add_line(
                "This section was collapsed when first loaded in the UI")
            result.add_section(collapse_section)

            # ==================================================================
            # Image Section
            #     This type of section allows the service writer to display images to the user
            image_section = ResultImageSection(request,
                                               'Example of Image section')
            for x in range(6):
                image_section.add_image(f'data/000{x+1}.jpg',
                                        f'000{x+1}.jpg',
                                        f'ResultSample screenshot 000{x+1}',
                                        ocr_heuristic_id=6)
            result.add_section(image_section)

            # ==================================================================
            # Multi Section
            #     This type of section allows the service writer to display multiple section types
            #     in the same result section. Here's a concrete exemple of this:
            multi_section = ResultMultiSection(
                'Example of Multi-typed section')
            multi_section.add_section_part(
                TextSectionBody(
                    body="We have detected very high entropy multiple sections "
                    "of your file, this section is most-likely packed or "
                    "encrypted.\n\nHere are affected sections:"))
            section_count = random.randint(1, 4)
            for x in range(section_count):
                multi_section.add_section_part(
                    KVSectionBody(section_name=f".UPX{x}",
                                  offset=f'0x00{8+x}000',
                                  size='4196 bytes'))
                graph_part = GraphSectionBody()
                graph_part.set_colormap(
                    0, 8, [7 + random.random() for _ in range(20)])
                multi_section.add_section_part(graph_part)
                if x != section_count - 1:
                    multi_section.add_section_part(DividerSectionBody())
                multi_section.add_tag("file.pe.sections.name", f".UPX{x}")

            multi_section.set_heuristic(5)
            result.add_section(multi_section)

            # ==================================================================
            # Propagate temporary submission data to other services
            #   Sometimes two service can work in tandem were one extra some piece of information the other
            #   one uses to do it's work. This is how a service can set temporary data that other
            #   services that subscribe to can use.
            request.temp_submission_data['kv_section'] = kv_section.body
            request.temp_submission_data[
                'process_tree_section'] = process_tree_section.body
            request.temp_submission_data['url_section'] = url_sub_section.body

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result