Ejemplo n.º 1
0
    def handle_artefacts(artefact_list: list,
                         request: ServiceRequest) -> ResultSection:
        """
        Goes through each artefact in artefact_list, uploading them and adding result sections accordingly

        Positional arguments:
        artefact_list -- list of dictionaries that each represent an artefact
        """

        validated_artefacts = SandboxOntology._validate_artefacts(
            artefact_list)

        artefacts_result_section = ResultSection("Sandbox Artefacts")

        for artefact in validated_artefacts:
            SandboxOntology._handle_artefact(artefact,
                                             artefacts_result_section)

            if artefact.to_be_extracted:
                try:
                    request.add_extracted(artefact.path, artefact.name,
                                          artefact.description)
                except MaxExtractedExceeded:
                    # To avoid errors from being raised when too many files have been extracted
                    pass
            else:
                request.add_supplementary(artefact.path, artefact.name,
                                          artefact.description)

        return artefacts_result_section if artefacts_result_section.subsections else None
    def execute(self, request: ServiceRequest) -> None:
        # Check that the current node has a version map
        while True:
            if self.nodes[self.current_node]['engine_count'] == 0:
                self._get_version_map(self.current_node)
                self.log.info("Getting version map from execute() function")
                if self.nodes[self.current_node]['engine_count'] == 0:
                    self.new_node(force=True)
            else:
                break

        filename = request.file_path
        try:
            response = self.scan_file(filename)
        except RecoverableError:
            response = self.scan_file(filename)
        result = self.parse_results(response)
        request.result = result
        request.set_service_context(
            f"Definition Time Range: {self.nodes[self.current_node]['oldest_dat']} - "
            f"{self.nodes[self.current_node]['newest_dat']}")

        # Compare queue time of current node with new random node after a minimum run time on current node
        elapsed_time = time.time() - self.start_time
        if elapsed_time >= self.config.get("max_node_time"):
            self.new_node(force=True)
        elif elapsed_time >= self.config.get("min_node_time"):
            self.new_node(force=False)
Ejemplo n.º 3
0
    def execute(self, request: ServiceRequest) -> None:
        sha256 = request.sha256
        result = Result()

        # First, let's get the analysis metadata, if it exists on the system
        main_api_result = self._get_analysis_metadata(
            request.get_param('analysis_id'), sha256)

        if not main_api_result:
            self.log.debug(f"SHA256 {sha256} is not on the system.")
            request.result = result
            return

        if main_api_result.get(
                "verdict") in Verdicts.NOT_SUPPORTED_VERDICTS.value:
            self.log.debug(f"Unsupported file type: {request.file_type}")
            request.result = result
            return
        elif main_api_result.get("verdict") == AnalysisStatusCode.FAILED.value:
            self.log.warning("The Intezer server is not feeling well :(")
            request.result = result
            return

        analysis_id = main_api_result["analysis_id"]

        # Setup the main result section
        main_kv_section = ResultKeyValueSection(
            "IntezerStatic analysis report")
        processed_main_api_result = self._process_details(
            main_api_result.copy(), UNINTERESTING_ANALYSIS_KEYS)
        main_kv_section.update_items(processed_main_api_result)
        if "family_name" in main_api_result:
            main_kv_section.add_tag("attribution.family",
                                    main_api_result["family_name"])

        # This file-verdict map will be used later on to assign heuristics to sub-analyses
        file_verdict_map = {}
        self._process_iocs(analysis_id, file_verdict_map, main_kv_section)
        if not self.config["is_on_premise"]:
            self._process_ttps(analysis_id, main_kv_section)
        self._handle_subanalyses(request, sha256, analysis_id,
                                 file_verdict_map, main_kv_section)

        # Setting heuristic here to avoid FPs
        if main_kv_section.subsections:
            self._set_heuristic_by_verdict(main_kv_section,
                                           main_api_result["verdict"])

        if main_kv_section.subsections or main_kv_section.heuristic:
            result.add_section(main_kv_section)
        request.result = result
 def gen_results(self, api_response):
     procr = self.upmal.process_results(api_response, self.upm)
     result = Result()
     service_task = ServiceTask(sample1)
     task = Task(service_task)
     request = ServiceRequest(task)
     self.upmal.generate_results(procr, result, api_response, request)
Ejemplo n.º 5
0
    def test_execute(sample, metadefender_class_instance, mocker):
        from assemblyline_v4_service.common.task import Task
        from assemblyline_v4_service.common.result import Result
        from assemblyline.odm.messages.task import Task as ServiceTask
        from assemblyline_v4_service.common.request import ServiceRequest
        import json
        metadefender_class_instance.nodes["blah"] = {
            "engine_count": 1,
            "oldest_dat": 1,
            "newest_dat": 1
        }
        mocker.patch.object(metadefender_class_instance, "_get_version_map")
        metadefender_class_instance.start()

        service_task = ServiceTask(sample)
        task = Task(service_task)
        metadefender_class_instance._task = task
        service_request = ServiceRequest(task)

        mocker.patch.object(metadefender_class_instance, "scan_file")
        mocker.patch.object(metadefender_class_instance, "new_node")
        mocker.patch.object(metadefender_class_instance,
                            "parse_results",
                            return_value=Result())

        # Actually executing the sample
        metadefender_class_instance.execute(service_request)

        # For coverage
        metadefender_class_instance.config["max_node_time"] = 0
        metadefender_class_instance.execute(service_request)

        metadefender_class_instance.config["max_node_time"] = 1000
        metadefender_class_instance.config["min_node_time"] = 0
        metadefender_class_instance.execute(service_request)
Ejemplo n.º 6
0
    def test_service(sample):
        overwrite_results = False  # Used temporarily to mass-correct tests

        cls = ViperMonkey()
        cls.start()

        task = Task(create_service_task(sample=sample))
        service_request = ServiceRequest(task)

        cls.execute(service_request)

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        # Get the assumed "correct" result of the sample
        correct_path = os.path.join(SELF_LOCATION, "tests", "results",
                                    f"{sample}.json")
        with open(correct_path, "r") as f:
            correct_result = json.load(f)

        test_result = generalize_result(test_result)

        if overwrite_results:
            if test_result != correct_result:
                with open(correct_path, "w") as f:
                    json.dump(test_result, f)
        else:
            assert test_result == correct_result
    def test_execute(class_instance, sample):
        # Imports required to execute the sample
        from assemblyline_v4_service.common.task import Task
        from assemblyline.odm.messages.task import Task as ServiceTask
        from assemblyline_v4_service.common.request import ServiceRequest

        # Creating the required objects for execution
        service_task = ServiceTask(sample1)
        task = Task(service_task)
        class_instance._task = task
        service_request = ServiceRequest(task)

        # Actually executing the sample
        class_instance.execute(service_request)

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        # Get the assumed "correct" result of the sample
        correct_result_path = os.path.join(TEST_DIR, "results", task.file_name + ".json")
        with open(correct_result_path, "r") as f:
            correct_result = json.loads(f.read())

        # Assert that the appropriate sections of the dict are equal

        # Avoiding date in the response
        test_result_response = test_result.pop("response")
        correct_result_response = correct_result.pop("response")
        assert test_result == correct_result

        # Comparing everything in the response except for the date
        test_result_response.pop("milestones")
        correct_result_response.pop("milestones")
        assert test_result_response == correct_result_response
Ejemplo n.º 8
0
 def handle_task(self, task: ServiceTask) -> None:
     try:
         self._task = Task(task)
         self.log.info(
             f"[{self._task.sid}] Starting task for file: {self._task.sha256} ({self._task.type})"
         )
         self._task.start(
             self.service_attributes.default_result_classification,
             self.service_attributes.version, self.get_tool_version())
         self.ontologies = defaultdict(list)
         request = ServiceRequest(self._task)
         self.execute(request)
         self._attach_service_meta_ontology(request)
         self._success()
     except RuntimeError as re:
         if is_recoverable_runtime_error(re):
             new_ex = exceptions.RecoverableError(
                 "Service trying to use a threadpool during shutdown")
             self._handle_execute_failure(
                 new_ex, exceptions.get_stacktrace_info(re))
         else:
             self._handle_execute_failure(
                 re, exceptions.get_stacktrace_info(re))
     except Exception as ex:
         self._handle_execute_failure(ex,
                                      exceptions.get_stacktrace_info(ex))
     finally:
         self._cleanup()
Ejemplo n.º 9
0
    def execute(self, request: ServiceRequest):
        try:
            self.client = Client(apikey=self.config.get(
                "api_key", request.get_param("api_key")),
                                 proxy=self.config.get('proxy') or None)
        except Exception as e:
            self.log.error("No API key found for VirusTotal")
            raise e

        if request.task.metadata.get('submitted_url',
                                     None) and request.task.depth == 0:
            response = self.scan_url(request)
        else:
            response = self.scan_file(request)
        if response:
            result = self.parse_results(response)
            request.result = result
        else:
            request.result = Result()
    def execute(self, request: ServiceRequest) -> None:
        """ Main Module. See README for details."""
        request.result = Result()
        patterns = PatternMatch()
        self.sample_type = request.file_type
        self.excess_extracted = 0
        # Filters for submission modes. Listed in order of use.
        if request.deep_scan:
            # Maximum size of submitted file to run this service:
            max_size = 8000000
            # String length maximum
            # Used in basic ASCII and UNICODE modules:
            max_length = 1000000
            # String list maximum size
            # List produced by basic ASCII and UNICODE module results and will determine
            # if patterns.py will only evaluate network IOC patterns:
            st_max_size = 1000000
            # BBcrack maximum size of submitted file to run module:
            bb_max_size = 200000
        else:
            max_size = self.config.get('max_size', 3000000)
            max_length = self.config.get('max_length', 5000)
            st_max_size = self.config.get('st_max_size', 0)
            bb_max_size = self.config.get('bb_max_size', 85000)

        # Begin analysis
        if (len(request.file_contents)
                or 0) >= max_size or self.sample_type.startswith("archive/"):
            # No analysis is done if the file is an archive or too large
            return

        self.ascii_results(request, patterns, max_length, st_max_size)
        self.embedded_pe_results(request)

        # Possible encoded strings -- all sample types except code/* (code is handled by deobfuscripter service)
        if not self.sample_type.startswith('code'):
            self.base64_results(request, patterns)
            if (len(request.file_contents) or 0) < bb_max_size:
                self.bbcrack_results(request)

        # Other possible encoded strings -- all sample types but code and executables
        if not self.sample_type.split('/', 1)[0] in ['executable', 'code']:
            self.unicode_results(request, patterns)
            # Go over again, looking for long ASCII-HEX character strings
            if not self.sample_type.startswith('document/office'):
                self.hex_results(request, patterns)

        if self.excess_extracted:
            self.log.warning(
                f"Too many files extracted from {request.sha256}, "
                f"{self.excess_extracted} files were not extracted")
            request.result.add_section(
                ResultSection(
                    f"Over extraction limit: "
                    f"{self.excess_extracted} files were not extracted"))
Ejemplo n.º 11
0
    def test_service(sample):
        config = helper.get_service_attributes().config

        cls = emlparser.emlparser.EmlParser(config=config)
        cls.start()

        task = Task(create_service_task(sample=sample))
        service_request = ServiceRequest(task)
        cls.execute(service_request)

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        assert "0766" in test_result["temp_submission_data"]["email_body"]
    def extract_powershell(self, parameter: str, section: ResultSection,
                           request: ServiceRequest) -> None:
        """Searches parameter for PowerShell, adds as extracted if found

        Args:
            parameter: String to be searched
            section: Section to be modified if PowerShell found
        """

        matches = find_powershell_strings(parameter.encode())

        if not matches:
            return

        self.found_powershell = True

        for match in matches:
            powershell_command = get_powershell_command(match.value)
            sha256hash = hashlib.sha256(powershell_command).hexdigest()
            # Add PowerShell code as extracted, account for duplicates
            if sha256hash not in self.file_hashes:
                powershell_filename = f"{sha256hash[0:10]}.ps1"
                ResultSection(
                    "Discovered PowerShell code in parameter.",
                    parent=section,
                    body=powershell_command[:100].decode() +
                    f"... see [{powershell_filename}]",
                )
                powershell_file_path = os.path.join(self.working_directory,
                                                    powershell_filename)
                with open(powershell_file_path, "wb") as f:
                    f.write(powershell_command)
                request.add_extracted(
                    powershell_file_path, powershell_filename,
                    "Discovered PowerShell code in parameter")
                self.file_hashes.append(sha256hash)
Ejemplo n.º 13
0
    def handle_task(self, task: ServiceTask) -> None:
        try:
            self._task = Task(task)
            self.log.info(f"Starting task: {self._task.sid}/{self._task.sha256} ({self._task.type})")
            self._task.start(self.service_attributes.default_result_classification,
                             self.service_attributes.version, self.get_tool_version())

            request = ServiceRequest(self._task)
            self.execute(request)

            self._success()
        except Exception as ex:
            self._handle_execute_failure(ex, exceptions.get_stacktrace_info(ex))
        finally:
            self._cleanup()
Ejemplo n.º 14
0
    def execute(self, request: ServiceRequest) -> Optional[Dict[str, Any]]:
        result = Result()
        request.result = result

        # Get AV labels from previous services
        av_labels = request.task.tags.get('av.virus_name')
        if not av_labels:
            return

        # Extract AVclass tags
        av_tags = self._get_avclass_tags(request.md5, request.sha1,
                                         request.sha256, av_labels)
        if av_tags is None:
            return

        # Build results
        section = self._get_result_section(av_tags.family, av_tags.is_pup)
        for tag_section in self._get_category_sections(av_tags.tags):
            section.add_subsection(tag_section)

        result.add_section(section)
 def execute(self, request: ServiceRequest) -> None:
     result = Result()
     self.hits = {}  # clear the hits dict
     path = request.file_path
     file_name = request.file_name
     self.log.info(f" Executing {file_name}")
     self.log.info(f"Number of rules {len(self.sigma_parser.rules)}")
     self.sigma_parser.register_callback(self.sigma_hit)
     self.sigma_parser.check_logfile(path)
     if len(self.hits) > 0:
         hit_section = ResultSection('Events detected as suspicious')
         # group alerts together
         for id, events in self.hits.items():
             title = self.sigma_parser.rules[id].title
             section = SigmaHitSection(title, events)
             tags = self.sigma_parser.rules[id].tags
             if tags:
                 for tag in tags:
                     name = tag[7:]
                     if name.startswith(('t', 'g', 's')):
                         attack_id = name.upper()
             source = events[0]['signature_source']
             if attack_id:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       attack_id=attack_id,
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             else:
                 section.set_heuristic(get_heur_id(events[0]['score']),
                                       signature=f"{source}.{title}")
                 section.add_tag(f"file.rule.{source}", f"{source}.{title}")
             for event in events:
                 # add the event data as a subsection
                 section.add_subsection(EventDataSection(event))
             hit_section.add_subsection(section)
         result.add_section(hit_section)
     request.result = result
    def test_execute(sample, target):
        # TODO: Break down the execute method to make it easily testable
        from assemblyline_v4_service.common.task import Task
        from assemblyline.odm.messages.task import Task as ServiceTask
        from assemblyline_v4_service.common.request import ServiceRequest

        service_task = ServiceTask(sample)
        task = Task(service_task)
        target._task = task
        service_request = ServiceRequest(task)

        # Actually executing the sample
        target.execute(service_request)

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        # Get the assumed "correct" result of the sample
        correct_result_path = os.path.join(TEST_DIR, "results",
                                           task.file_name + ".json")
        with open(correct_result_path, "r") as f:
            correct_result = json.loads(f.read())
        f.close()

        # Assert that the appropriate sections of the dict are equal

        # Avoiding unique items in the response
        test_result_response = test_result.pop("response")
        correct_result_response = correct_result.pop("response")
        assert test_result == correct_result

        # Comparing everything in the response except for the service_completed and the output.json supplementary
        test_result_response["milestones"].pop("service_completed")
        correct_result_response["milestones"].pop("service_completed")
        correct_result_response.pop("supplementary")
        test_result_response.pop("supplementary")
        assert test_result_response == correct_result_response
Ejemplo n.º 17
0
    def dexray(self, request: ServiceRequest, local: str):
        """Iterate through quarantine decrypt methods.
        Args:
            request: AL request object.
            local: File path of AL sample.
        Returns:
            True if archive is password protected, and number of white-listed embedded files.
        """
        encoding = request.file_type.replace("quarantine/", "")
        extracted = []
        metadata = {}

        # Try all extracting methods
        for extract_method in self.extract_methods:
            # noinspection PyArgumentList
            extracted, metadata = extract_method(local, self.sha,
                                                 self.working_directory,
                                                 encoding)
            if extracted or metadata:
                break

        extracted_count = len(extracted)
        # safe_str the file name (fn)
        extracted = [[fp, safe_str(fn), e] for fp, fn, e in extracted]
        for child in extracted:
            try:
                # If the file is not successfully added as extracted, then decrease the extracted file counter
                if not request.add_extracted(*child):
                    extracted_count -= 1
            except MaxExtractedExceeded:
                raise MaxExtractedExceeded(
                    f"This file contains {extracted_count} extracted files, exceeding the "
                    f"maximum of {request.max_extracted} extracted files allowed. "
                    "None of the files were extracted.")

        return metadata
Ejemplo n.º 18
0
    def execute(self, request: ServiceRequest) -> None:
        request.result = Result()

        # 1. Calculate entropy map
        with open(request.file_path, "rb") as fin:
            (entropy, part_entropies) = calculate_partition_entropy(fin)

        entropy_graph_data = {"type": "colormap", "data": {"domain": [0, 8], "values": part_entropies}}

        ResultSection(
            f"File entropy: {round(entropy, 3)}",
            parent=request.result,
            body_format=BODY_FORMAT.GRAPH_DATA,
            body=json.dumps(entropy_graph_data, allow_nan=False),
        )

        if request.file_type != "shortcut/windows":
            # 2. Get hachoir metadata
            parser = createParser(request.file_path)
            if parser is not None:
                with parser:
                    parser_tags = parser.getParserTags()
                    parser_id = parser_tags.get("id", "unknown")

                    # Do basic metadata extraction
                    metadata = extractMetadata(parser, 1)

                    if metadata:
                        kv_body: Dict[str, Union[str, List[str]]] = {}
                        tags: List[Tuple[str, str]] = []
                        for m in metadata:
                            if m.key == "comment":
                                for v in m.values:
                                    key, val = get_type_val(v.text, "comment")
                                    if not val:
                                        continue

                                    kv_body[key] = val

                                    tag_type = TAG_MAP.get(parser_id, {}).get(key, None) or TAG_MAP.get(None, {}).get(
                                        key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, val))
                            elif m.key in ["mime_type"]:
                                pass
                            else:
                                values = [v.text for v in m.values]
                                if len(values) == 1 and values[0]:
                                    kv_body[m.key] = values[0]
                                elif values:
                                    kv_body[m.key] = values

                                for v in values:
                                    tag_type = TAG_MAP.get(parser_id, {}).get(m.key, None) or TAG_MAP.get(None, {}).get(
                                        m.key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, v))

                        if kv_body:
                            res = ResultSection(
                                f"Metadata extracted by hachoir-metadata [Parser: {parser_id}]",
                                body=json.dumps(kv_body, allow_nan=False),
                                body_format=BODY_FORMAT.KEY_VALUE,
                                parent=request.result,
                            )

                            for t_type, t_val in tags:
                                res.add_tag(t_type, t_val)

        # 3. Get Exiftool Metadata
        exif = subprocess.run(["exiftool", "-j", request.file_path], capture_output=True, check=False)
        if exif.stdout:
            exif_data = json.loads(exif.stdout.decode("utf-8", errors="ignore"))
            res_data = exif_data[0]
            if "Error" not in res_data:
                exif_body = {}
                for k, v in res_data.items():
                    if v and k not in [
                        "SourceFile",
                        "ExifToolVersion",
                        "FileName",
                        "Directory",
                        "FileSize",
                        "FileModifyDate",
                        "FileAccessDate",
                        "FileInodeChangeDate",
                        "FilePermissions",
                        "FileType",
                        "FileTypeExtension",
                        "MIMEType",
                        "Warning",
                    ]:
                        if v in [float("inf"), -float("inf"), float("nan")]:
                            exif = subprocess.run(
                                ["exiftool", f"-{k}", "-T", request.file_path], capture_output=True, check=False
                            )
                            v = exif.stdout.decode("utf-8", errors="ignore").strip()
                        exif_body[build_key(k)] = v
                if exif_body:
                    e_res = ResultSection(
                        "Metadata extracted by ExifTool",
                        body=json.dumps(exif_body, allow_nan=False),
                        body_format=BODY_FORMAT.KEY_VALUE,
                        parent=request.result,
                    )
                    for k, v in exif_body.items():
                        tag_type = TAG_MAP.get(res_data.get("FileTypeExtension", "UNK").upper(), {}).get(
                            k, None
                        ) or TAG_MAP.get(None, {}).get(k, None)
                        if tag_type:
                            e_res.add_tag(tag_type, v)

        # 4. Lnk management.
        if request.file_type == "shortcut/windows":
            with open(request.file_path, "rb") as indata:
                lnk = LnkParse3.lnk_file(indata)

            features = lnk.get_json(get_all=True)

            lnk_result_section = ResultSection(
                "Extra metadata extracted by LnkParse3",
                parent=request.result,
            )

            heur_1_items = {}
            risky_executable = ["rundll32.exe", "powershell.exe", "cmd.exe", "mshta.exe"]

            if "command_line_arguments" in features["data"]:
                if any(x in features["data"]["command_line_arguments"].lower() for x in risky_executable):
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
                elif " && " in features["data"]["command_line_arguments"]:
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]

            lbp = ""
            if "local_base_path" in features["link_info"]:
                lbp = features["link_info"]["local_base_path"]
                if "common_path_suffix" in features["link_info"]:
                    lbp = f"{lbp}{features['link_info']['common_path_suffix']}"
                if any(x in lbp.lower() for x in risky_executable):
                    heur_1_items["local_base_path"] = features["link_info"]["local_base_path"]

            if "relative_path" in features["data"]:
                if any(x in features["data"]["relative_path"].lower() for x in risky_executable):
                    heur_1_items["relative_path"] = features["data"]["relative_path"]

            target = ""
            if "target" in features:
                import ntpath

                if "items" in features["target"]:
                    last_item = None
                    for item in features["target"]["items"]:
                        if "primary_name" in item:
                            last_item = item
                            target = ntpath.join(target, item["primary_name"])

                    if last_item and last_item["flags"] == "Is directory":
                        target = ""

                    if any(x in target.lower() for x in risky_executable):
                        heur_1_items["target_file_dosname"] = target

            if "icon_location" in features["data"]:
                deceptive_icons = ["wordpad.exe", "shell32.dll"]

                lnk_result_section.add_tag(
                    tag_type="file.shortcut.icon_location", value=features["data"]["icon_location"]
                )
                if any(
                    features["data"]["icon_location"].lower().strip('"').strip("'").endswith(x) for x in deceptive_icons
                ):
                    heur = Heuristic(4)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    heur_section.set_item("icon_location", features["data"]["icon_location"])

            timestamps = []
            if features["header"]["creation_time"]:
                timestamps.append(("creation_time", features["header"]["creation_time"]))
            if features["header"]["modified_time"]:
                timestamps.append(("modified_time", features["header"]["modified_time"]))

            if request.task.depth != 0:
                heur2_earliest_ts = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
                    days=self.config.get("heur2_flag_more_recent_than_days", 3)
                )
                heur2_latest_ts = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=2)
                recent_timestamps = []
                future_timestamps = []
                for k, timestamp in timestamps:
                    if timestamp < heur2_earliest_ts:
                        continue
                    if timestamp > heur2_latest_ts:
                        future_timestamps.append((k, timestamp))
                        continue
                    recent_timestamps.append((k, timestamp))

                if recent_timestamps:
                    heur = Heuristic(2)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in recent_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())
                if future_timestamps:
                    heur = Heuristic(3)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in future_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())

            if "DISTRIBUTED_LINK_TRACKER_BLOCK" in features["extra"]:
                if "machine_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    machine_id = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["machine_identifier"]
                    lnk_result_section.add_tag("file.shortcut.machine_id", machine_id)
                    if machine_id.lower().startswith("desktop-"):
                        heur = Heuristic(5)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                        heur_section.set_item("machine_identifier", machine_id)
                if "droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)
                elif "birth_droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["birth_droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)

            # Adapted code from previous logic. May be best replaced by new heuristics and logic.
            bp = str(lbp).strip()
            rp = str(features["data"].get("relative_path", "")).strip()
            nn = str(features["data"].get("net_name", "")).strip()
            t = str(target).strip().rsplit("\\")[-1].strip()
            cla = str(features["data"].get("command_line_arguments", "")).strip()

            filename_extracted = (bp or rp or t or nn).rsplit("\\")[-1].strip()
            if filename_extracted:
                lnk_result_section.add_tag(tag_type="file.name.extracted", value=(bp or rp or t or nn).rsplit("\\")[-1])

            process_cmdline = f"{(rp or bp or t or nn)} {cla}".strip()
            if process_cmdline:
                lnk_result_section.add_tag(tag_type="file.shortcut.command_line", value=process_cmdline)

            cmd_code = None
            if filename_extracted in ["cmd", "cmd.exe"]:
                cmd_code = (get_cmd_command(f"{filename_extracted} {cla}".encode()), "bat")
                if "rundll32 " in cla:  # We are already checking for rundll32.exe as part of risky_executable
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
            elif filename_extracted in ["powershell", "powershell.exe"]:
                cmd_code = (get_powershell_command(f"{filename_extracted} {cla}".encode()), "ps1")

            if heur_1_items:
                heur = Heuristic(1)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.update_items(heur_1_items)

            if cmd_code:
                sha256hash = hashlib.sha256(cmd_code[0]).hexdigest()
                cmd_filename = f"{sha256hash[0:10]}.{cmd_code[1]}"
                cmd_file_path = os.path.join(self.working_directory, cmd_filename)
                with open(cmd_file_path, "wb") as cmd_f:
                    cmd_f.write(cmd_code[0])
                request.add_extracted(
                    cmd_file_path,
                    cmd_filename,
                    "Extracted LNK execution code",
                )

            def _datetime_to_str(obj):
                if isinstance(obj, datetime.datetime):
                    return obj.isoformat()
                return obj

            temp_path = os.path.join(self.working_directory, "features.json")
            with open(temp_path, "w") as f:
                json.dump(features, f, default=_datetime_to_str)
            request.add_supplementary(temp_path, "features.json", "Features extracted from the LNK file")

            if lnk.appended_data:
                sha256hash = hashlib.sha256(lnk.appended_data).hexdigest()
                appended_data_path = os.path.join(self.working_directory, sha256hash)
                with open(appended_data_path, "wb") as appended_data_f:
                    appended_data_f.write(lnk.appended_data)
                request.add_extracted(
                    appended_data_path,
                    sha256hash,
                    "Additional data at the end of the LNK file",
                )
                heur = Heuristic(6)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.set_item("Length", len(lnk.appended_data))

        # 5. URL file management
        if request.file_type == "shortcut/web":
            config = ConfigParser()
            config.read(request.file_path)

            res = ResultKeyValueSection("Metadata extracted by Ini Reader", parent=request.result)
            for k, v in config["InternetShortcut"].items():
                res.set_item(k, v)

                if k == "url":
                    if v.startswith("http://") or v.startswith("https://"):
                        res.add_tag("network.static.uri", v)
                    elif v.startswith("file:"):
                        heur = Heuristic(1)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=res)
                        heur_section.set_item("url", v)

            config.pop("InternetShortcut", None)
            if config.sections():
                extra_res = ResultKeyValueSection("Extra sections", parent=res)
                extra_res.set_item("Names", ", ".join(config.sections()))
Ejemplo n.º 19
0
    def _attach_service_meta_ontology(self, request: ServiceRequest) -> None:

        heuristics = helper.get_heuristics()

        def preprocess_result_for_dump(sections, current_max, heur_tag_map,
                                       tag_map):
            for section in sections:
                # Determine max classification of the overall result
                current_max = forge.get_classification().max_classification(
                    section.classification, current_max)

                # Cleanup invalid tagging from service results
                def validate_tags(tag_map):
                    tag_map, _ = construct_safe(Tagging, unflatten(tag_map))
                    tag_map = flatten(tag_map.as_primitives(strip_null=True))
                    return tag_map

                # Merge tags
                def merge_tags(tag_a, tag_b):
                    if not tag_a:
                        return tag_b

                    elif not tag_b:
                        return tag_a

                    all_keys = list(tag_a.keys()) + list(tag_b.keys())
                    return {
                        key: list(set(tag_a.get(key, []) + tag_b.get(key, [])))
                        for key in all_keys
                    }

                # Append tags raised by the service, if any
                section_tags = validate_tags(section.tags)
                if section_tags:
                    tag_map.update(section_tags)

                # Append tags associated to heuristics raised by the service, if any
                if section.heuristic:
                    heur = heuristics[section.heuristic.heur_id]
                    key = f'{self.name.upper()}_{heur.heur_id}'
                    update_value = {"name": heur.name, "tags": {}}
                    if section_tags:
                        update_value = \
                            {
                                "name": heur.name,
                                "tags": merge_tags(heur_tag_map[key]["tags"], section_tags)
                            }
                    heur_tag_map[key].update(update_value)

                # Recurse through subsections
                if section.subsections:
                    current_max, heur_tag_map, tag_map = preprocess_result_for_dump(
                        section.subsections, current_max, heur_tag_map,
                        tag_map)

            return current_max, heur_tag_map, tag_map

        if not request.result or not request.result.sections:
            # No service results, therefore no ontological output
            return

        max_result_classification, heur_tag_map, tag_map = preprocess_result_for_dump(
            request.result.sections,
            request.task.service_default_result_classification,
            defaultdict(lambda: {"tags": dict()}), defaultdict(list))

        if not tag_map and not self.ontologies:
            # No tagging or ontologies found, therefore informational results
            return

        ontology = {
            'header': {
                'md5': request.md5,
                'sha1': request.sha1,
                'sha256': request.sha256,
                'type': request.file_type,
                'size': request.file_size,
                'classification': max_result_classification,
                'service_name': request.task.service_name,
                'service_version': request.task.service_version,
                'service_tool_version': request.task.service_tool_version,
                'tags': tag_map,
                'heuristics': heur_tag_map
            }
        }
        # Include Ontological data
        ontology.update(
            {type.lower(): data
             for type, data in self.ontologies.items()})

        ontology_suffix = f"{request.sha256}.ontology"
        ontology_path = os.path.join(self.working_directory, ontology_suffix)
        try:
            open(ontology_path, 'w').write(
                json.dumps(
                    ResultOntology(ontology).as_primitives(strip_null=True)))
            attachment_name = f'{request.task.service_name}_{ontology_suffix}'.lower(
            )
            request.add_supplementary(
                path=ontology_path,
                name=attachment_name,
                description=f"Result Ontology from {request.task.service_name}",
                classification=max_result_classification)
        except ValueError as e:
            self.log.error(f"Problem with generating ontology: {e}")
Ejemplo n.º 20
0
    def test_service(sample):
        overwrite_results = False  # Used temporarily to mass-correct tests

        cls = Characterize()
        cls.start()

        task = Task(create_service_task(sample=sample))
        service_request = ServiceRequest(task)

        cls.execute(service_request)

        result_dir_files = [
            os.path.basename(x) for x in glob.glob(
                os.path.join(SELF_LOCATION, "tests", "results", sample, "*"))
        ]

        correct_path = os.path.join(SELF_LOCATION, "tests", "results", sample,
                                    "features.json")
        if os.path.exists(correct_path):
            result_dir_files.remove("features.json")
            with open(correct_path, "r") as f:
                correct_result = json.load(f)

            test_path = os.path.join(cls.working_directory, "features.json")
            with open(test_path, "r") as f:
                test_result = json.load(f)

            if overwrite_results:
                if test_result != correct_result:
                    with open(correct_path, "w") as f:
                        json.dump(test_result, f)
            else:
                assert test_result == correct_result

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        result_dir_files.remove("result.json")
        # Get the assumed "correct" result of the sample
        correct_path = os.path.join(SELF_LOCATION, "tests", "results", sample,
                                    "result.json")
        with open(correct_path, "r") as f:
            correct_result = json.load(f)

        test_result = generalize_result(test_result)

        if overwrite_results:
            if test_result != correct_result:
                with open(correct_path, "w") as f:
                    json.dump(test_result, f)
        else:
            assert test_result == correct_result

        for extracted_file in test_result["response"]["extracted"]:
            if not overwrite_results or extracted_file[
                    "name"] in result_dir_files:
                result_dir_files.remove(extracted_file["name"])

            correct_path = os.path.join(SELF_LOCATION, "tests", "results",
                                        sample, extracted_file["name"])
            with open(correct_path, "rb") as f:
                correct_result = f.read()

            test_path = os.path.join(cls.working_directory,
                                     extracted_file["name"])
            with open(test_path, "rb") as f:
                test_result = f.read()

            if overwrite_results:
                if test_result != correct_result:
                    with open(correct_path, "wb") as f:
                        f.write(test_result)
            else:
                assert test_result == correct_result

        assert not result_dir_files
    def test_execute(sample, intezer_static_class_instance,
                     dummy_api_interface_class, dummy_get_response_class,
                     mocker):
        from assemblyline_v4_service.common.task import Task
        from assemblyline.odm.messages.task import Task as ServiceTask
        from assemblyline_v4_service.common.request import ServiceRequest
        from json import loads
        from intezer_static import ALIntezerApi

        mocker.patch.object(intezer_static_class_instance,
                            "get_api_interface",
                            return_value=dummy_api_interface_class)
        intezer_static_class_instance.start()

        service_task = ServiceTask(sample)
        task = Task(service_task)
        task.service_config = {
            "analysis_id": "",
        }
        intezer_static_class_instance._task = task
        service_request = ServiceRequest(task)
        intezer_static_class_instance.config["private_only"] = False

        mocker.patch.object(ALIntezerApi,
                            "get_latest_analysis",
                            return_value={"analysis_id": "blah"})
        mocker.patch.object(ALIntezerApi,
                            "analyze_by_file",
                            return_value="blah")
        mocker.patch.object(ALIntezerApi,
                            "get_iocs",
                            return_value={
                                "files": [],
                                "network": []
                            })
        mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[])
        mocker.patch.object(ALIntezerApi,
                            "get_sub_analyses_by_id",
                            return_value=[])

        # Actually executing the sample
        intezer_static_class_instance.execute(service_request)

        # Get the result of execute() from the test method
        test_result = task.get_service_result()

        # Get the assumed "correct" result of the sample
        correct_result_path = os.path.join(TEST_DIR, "results",
                                           task.file_name + ".json")
        with open(correct_result_path, "r") as f:
            correct_result = loads(f.read())
        f.close()

        # Assert that the appropriate sections of the dict are equal

        # Avoiding unique items in the response
        test_result_response = test_result.pop("response")
        correct_result_response = correct_result.pop("response")
        assert test_result == correct_result

        # Comparing everything in the response except for the service_completed and the output.json supplementary
        test_result_response["milestones"].pop("service_completed")
        correct_result_response["milestones"].pop("service_completed")
        correct_result_response.pop("supplementary")
        test_result_response.pop("supplementary")
        correct_result_response.pop("service_context")
        test_result_response.pop("service_context")
        assert test_result_response == correct_result_response

        # Code coverage
        task.service_config = {
            "analysis_id": "blah",
        }
        intezer_static_class_instance._task = task
        service_request = ServiceRequest(task)
        intezer_static_class_instance.execute(service_request)

        task.service_config = {"analysis_id": ""}
        intezer_static_class_instance.config["is_on_premise"] = False
        mocker.patch.object(ALIntezerApi,
                            "get_latest_analysis",
                            return_value={"verdict": "not_supported"})
        mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[])
        intezer_static_class_instance.execute(service_request)

        mocker.patch.object(ALIntezerApi,
                            "get_latest_analysis",
                            return_value={"verdict": "failed"})
        intezer_static_class_instance.execute(service_request)
 def execute(self, request: ServiceRequest):
     filename = posixpath.basename(request.file_name)
     request.result = self.check_file_name_anomalies(filename)
     return
Ejemplo n.º 23
0
    def _handle_subanalyses(self, request: ServiceRequest, sha256: str,
                            analysis_id: str, file_verdict_map: Dict[str, str],
                            parent_section: ResultSection) -> None:
        """
        This method handles the subanalyses for a given analysis ID
        :param request: The service request object
        :param sha256: The hash of the given file
        :param analysis_id: The ID for the analysis which we will be retrieving
        :param file_verdict_map: A map of sha256s representing a file's
        contents, and the verdict for that file
        :param parent_result_section: The result section that the network
        result section will be added to, if applicable
        :return: None
        """
        so = SandboxOntology()

        # This boolean is used to determine if we should try to download another file
        can_we_download_files = True

        # These sets will be used as we work through the process trees
        process_path_set = set()
        command_line_set = set()

        # Now let's get into the subanalyses for this sample
        sub_analyses = self.client.get_sub_analyses_by_id(analysis_id)

        for sub in sub_analyses:
            sub_analysis_id = sub["sub_analysis_id"]

            # Get the extraction info, which is basically the details of how the subanalysis object came to be
            extraction_info = sub.pop("extraction_info", None)

            # Processes is only present when the sample has undergone dynamic execution
            if extraction_info and "processes" not in extraction_info:
                extraction_info = None

            code_reuse = self.client.get_sub_analysis_code_reuse_by_id(
                analysis_id, sub_analysis_id)

            if code_reuse:
                families = code_reuse.pop("families", [])
            else:
                families = []

            if not families and not extraction_info:
                # Otherwise, boring!
                continue

            if families and not any(family["reused_gene_count"] > 1
                                    for family in families):
                # Most likely a false positive
                continue

            ###
            # If we have gotten to this point, then the sub analysis is worth reporting
            ###

            extraction_method = sub["source"].replace("_", " ")

            if extraction_method != "root":
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}, extracted via {extraction_method}"
                )
            else:
                sub_kv_section = ResultKeyValueSection(
                    f"Subanalysis report for {sub['sha256']}")

            metadata = self.client.get_sub_analysis_metadata_by_id(
                analysis_id, sub_analysis_id)
            processed_subanalysis = self._process_details(
                metadata.copy(), UNINTERESTING_SUBANALYSIS_KEYS)
            sub_kv_section.update_items(processed_subanalysis)
            parent_section.add_subsection(sub_kv_section)

            if code_reuse:
                code_reuse_kv_section = ResultKeyValueSection(
                    "Code reuse detected")
                code_reuse_kv_section.update_items(code_reuse)
                sub_kv_section.add_subsection(code_reuse_kv_section)

            sub_sha256 = sub["sha256"]
            if families:
                self._process_families(families, sub_sha256, file_verdict_map,
                                       sub_kv_section)

            if extraction_info:
                self._process_extraction_info(extraction_info["processes"],
                                              process_path_set,
                                              command_line_set, so)

            # Setting a heuristic here or downloading the file would be redundant if the hash matched the original file
            if sub_sha256 != sha256:
                self._set_heuristic_by_verdict(
                    sub_kv_section, file_verdict_map.get(sub_sha256))

                if can_we_download_files:
                    file_was_downloaded = self.client.download_file_by_sha256(
                        sub_sha256, self.working_directory)
                    if file_was_downloaded:
                        path = f"{self.working_directory}/{sub_sha256}.sample"
                        request.add_extracted(
                            path,
                            f"{sub_sha256}.sample",
                            f"Extracted via {extraction_method}",
                        )
                        self.log.debug(
                            f"Added {sub_sha256}.sample as an extracted file.")
                    else:
                        can_we_download_files = False

        process_tree_section = so.get_process_tree_result_section()
        for process_path in process_path_set:
            process_tree_section.add_tag("dynamic.process.file_name",
                                         process_path)
        for command_line in command_line_set:
            process_tree_section.add_tag("dynamic.process.command_line",
                                         command_line)
        if process_tree_section.body:
            parent_section.add_subsection(process_tree_section)
    def check_for_b64(self, data: str, section: ResultSection,
                      request: ServiceRequest, file_contents: bytes) -> bool:
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: base64 subsection, must have heuristic set

        Returns:
            decoded: Boolean which is true if base64 found
        """
        assert section.heuristic

        decoded_param = data
        decoded = False

        encoded_data = data.encode()
        for content, start, end in find_base64(encoded_data):
            if encoded_data[start:end] in file_contents:
                # Present in original file, not an intermediate IoC
                continue
            try:
                # Powershell base64 will be utf-16
                content = content.decode("utf-16").encode()
            except UnicodeDecodeError:
                pass
            try:
                if len(content) < FILE_PARAMETER_SIZE:
                    decoded_param = decoded_param[:
                                                  start] + " " + content.decode(
                                                      errors="ignore"
                                                  ) + decoded_param[end:]
                else:
                    b64hash = ""
                    pe_files = find_pe_files(content)
                    for pe_file in pe_files:
                        b64hash = hashlib.sha256(pe_file).hexdigest()
                        pe_path = os.path.join(self.working_directory, b64hash)
                        with open(pe_path, "wb") as f:
                            f.write(pe_file)
                        request.add_extracted(
                            pe_path, b64hash,
                            "PE file found in base64 encoded parameter")
                        section.heuristic.add_signature_id("pe_file")
                    if not pe_files:
                        b64hash = hashlib.sha256(content).hexdigest()
                        content_path = os.path.join(self.working_directory,
                                                    b64hash)
                        with open(content_path, "wb") as f:
                            f.write(content)
                        request.add_extracted(
                            content_path, b64hash,
                            "Large base64 encoded parameter")
                        section.heuristic.add_signature_id("possible_file")
                    decoded_param = decoded_param[:
                                                  start] + f"[See extracted file {b64hash}]" + decoded_param[
                                                      end:]
                decoded = True
            except Exception:
                pass

        if decoded:
            section.heuristic.increment_frequency()
            section.add_line(
                f"Possible Base64 {truncate(data)} decoded: {decoded_param}")
            self.find_ip(decoded_param)

        return decoded
    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )
Ejemplo n.º 26
0
    def execute(self, request: ServiceRequest) -> None:
        """ Main module see README for details. """
        start = time.time()

        result = Result()
        request.result = result
        file_path = request.file_path
        if request.deep_scan:
            # Maximum size of submitted file to run this service:
            max_size = 200000
            # String length maximum, used in basic ASCII and UNICODE modules:
            max_length = 1000000
            # String list maximum size
            # List produced by basic ASCII and UNICODE module results and will determine
            # if patterns.py will only evaluate network IOC patterns:
            st_max_size = 100000
            # Minimum string size for encoded/stacked string modules:
            enc_min_length = 7
            stack_min_length = 7
        else:
            max_size = self.config.get('max_size', 85000)
            max_length = self.config.get('max_length', 5000)
            st_max_size = self.config.get('st_max_size', 0)
            enc_min_length = self.config.get('enc_min_length', 7)
            stack_min_length = self.config.get('stack_min_length', 7)
        timeout = self.service_attributes.timeout - 50

        if len(request.file_contents) > max_size:
            return

        stack_args = [
            FLOSS, f'-n {stack_min_length}', '--no-decoded-strings', file_path
        ]
        decode_args = [
            FLOSS, f'-n {enc_min_length}', '-x', '--no-static-strings',
            '--no-stack-strings', file_path
        ]

        with Popen(stack_args, stdout=PIPE, stderr=PIPE) as stack, \
                Popen(decode_args, stdout=PIPE, stderr=PIPE) as decode:
            stack_out, _, timed_out = self.handle_process(
                stack, timeout + start - time.time(), ' '.join(stack_args))
            if timed_out:
                result.add_section(
                    ResultSection('FLARE FLOSS stacked strings timed out'))
                self.log.warning(
                    f'floss stacked strings timed out for sample {request.sha256}'
                )

            dec_out, dec_err, timed_out = self.handle_process(
                decode, timeout + start - time.time(), ' '.join(decode_args))
            if timed_out:
                result.add_section(
                    ResultSection('FLARE FLOSS decoded strings timed out'))
                self.log.warning(
                    f'floss decoded strings timed out for sample {request.sha256}'
                )

        if stack_out:
            sections = [[y for y in x.splitlines() if y]
                        for x in stack_out.split(b'\n\n')]
            for section in sections:
                if not section:  # skip empty
                    continue
                match = re.match(rb'FLOSS static\s+.*\s+strings', section[0])
                if match:
                    result_section = static_result(section, max_length,
                                                   st_max_size)
                    if result_section:
                        result.add_section(result_section)
                    continue
                match = re.match(rb'.*\d+ stackstring.*', section[0])
                if match:
                    result_section = stack_result(section)
                    if result_section:
                        result.add_section(result_section)
                    continue

        # Process decoded strings results
        if dec_out:
            result_section = decoded_result(dec_out)
            if result_section:
                if dec_err:
                    result_section.add_line(
                        "Flare Floss generated error messages while analyzing:"
                    )
                    result_section.add_line(safe_str(dec_err))
                result.add_section(result_section)
Ejemplo n.º 27
0
    def execute(self, request: ServiceRequest) -> None:
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values and request.get_param('extract_original_iocs'):
            ioc_res = ResultSection(
                "The following IOCs were found in the original file",
                parent=request.result,
                body_format=BODY_FORMAT.MEMORY_DUMP)
            for k, val in pat_values.items():
                for v in val:
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                        )
                        ioc_res.add_tag(k, v)

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list: List[Tuple[str, bytes]] = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if regex.match(regex.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            with ThreadPoolExecutor() as executor:
                threads = [
                    executor.submit(technique, layer)
                    for name, technique in techniques
                ]
                results = [thread.result() for thread in threads]
                for i in range(len(results)):
                    result = results[i]
                    if result:
                        layers_list.append((techniques[i][0], result))
                        # Looks like it worked, restart with new layer
                        layer = result
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    with ThreadPoolExecutor() as executor:
                        threads = [
                            executor.submit(technique, layer)
                            for name, technique in final_pass
                        ]
                        results = [thread.result() for thread in threads]
                        for i in range(len(results)):
                            result = results[i]
                            if result:
                                layers_list.append((techniques[i][0], result))
                    break
                for x in second_pass:
                    techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            else:  # num_layers >= 100
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags: Dict[str, List[bytes]] = {}

                for uri in pat_values.get('network.static.uri', []):
                    # Compare URIs without query string
                    uri = uri.split(b'?', 1)[0]
                    if uri not in request.file_contents:
                        diff_tags.setdefault('network.static.uri', [])
                        diff_tags['network.static.uri'].append(uri)

                if request.deep_scan or (len(clean) > 1000
                                         and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    file_name = f"{os.path.basename(request.file_name)}_decoded_final"
                    file_path = os.path.join(self.working_directory, file_name)
                    # Ensure directory exists before write
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    with open(file_path, 'wb+') as f:
                        f.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {file_path}"
                        )
                    request.add_extracted(file_path, file_name,
                                          "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for extracted in self.files_extracted:
                        file_name = os.path.basename(extracted)
                        ext_file_res.add_line(file_name)
                        request.add_extracted(
                            extracted, file_name,
                            "File of interest deobfuscated from sample")