Example #1
0
    def LSB_visual(self):
        """Convert pixel data so that each value in a pixel is either 0 (if LSB == 0) or 255 (if LSB == 1)"""
        img = Image.new(self.imode, self.isize)
        if self.working_directory is None:
            self.working_directory = path.dirname(__file__)
        try:
            if self.channels_to_process == 1:
                img.putdata(self.iter_grayscale_pixels())
                success = True
            else:
                img.putdata(self.iter_rgba_pixels())
                success = True
        except:
            success = False

        if success:
            lsb_visual_path = path.join(
                self.working_directory,
                "LSB_visual_attack.{}".format(self.iformat.lower()))
            img.save(lsb_visual_path)
            # Save to AL supplementary file. Request should therefore be set and working_directory given.
            if self.request is not None:
                self.request.add_supplementary(
                    lsb_visual_path, "LSB_visual_attack",
                    "Pixaxe LSB visual attack image")
                if self.result is not None:
                    visres = ResultSection('Visual LSB Analysis.\t')
                    visres.add_line(
                        'Visual LSB analysis successful, see extracted files.')
                    self.working_result.add_subsection(visres)
            else:
                img.show()
        return
Example #2
0
 def additional_parsing(self, file_path: str) -> Optional[ResultSection]:
     urls = set()
     try:
         with pikepdf.open(file_path) as pdf:
             num_pages = len(pdf.pages)
             for page in pdf.pages:
                 if '/Annots' not in page:
                     continue
                 for annot in page['/Annots'].as_list():
                     if annot.get('/Subtype') == '/Link':
                         if '/A' not in annot:
                             continue
                     _url = annot['/A'].get('/URI')
                     if not hasattr(_url, '__str__'):
                         continue
                     url = str(_url)
                     if re.match(FULL_URI, url):
                         urls.add(url)
         if not urls:
             return None
         patterns = PatternMatch()
         body = '\n'.join(urls)
         tags: dict[str, set[bytes]] = patterns.ioc_match(body.encode())
         result = ResultSection(
             'URL in Annotations',
             heuristic=Heuristic(
                 27, signature='one_page' if num_pages == 1 else None),
             body=body)
         for ty, vals in tags.items():
             for val in vals:
                 result.add_tag(ty, val)
         return result
     except Exception as e:
         self.log.warning(f'pikepdf failed to parse sample: {e}')
         return None
Example #3
0
    def _get_category_section(self, category: str,
                              tags: Iterator[AVClassTag]) -> ResultSection:
        """
        Gets a `ResultSection` for a list of tags from a single category.

        Result contains table with AVclass tag information in descending order by rank.

        :param category: Category of tags
        :param tags: Tags belonging to category
        :return: `ResultSection`
        """
        tags = sorted(tags, key=lambda t: t.rank, reverse=True)

        category_name, heur_id, tag_type = AVCLASS_CATEGORY[category]
        tag_table = [{
            'name': tag.name,
            'category': category_name,
            'path': tag.path,
            'rank': tag.rank
        } for tag in tags]

        section = ResultSection(
            f'AVclass extracted {len(tags)} {category_name} tags',
            body=json.dumps(tag_table),
            body_format=BODY_FORMAT.TABLE,
            heuristic=Heuristic(heur_id) if heur_id is not None else None)
        if tag_type is not None:
            for tag in tags:
                section.add_tag(tag_type, tag.name)

        return section
Example #4
0
 def dump_invalid_properties(self, parent_res):
     if self.invalid_properties_count:
         res = ResultSection(
             f"We've found {self.invalid_properties_count} properties with IDs different than "
             f"1 (storage), 2 (stream) and 5 (root)",
             parent=parent_res)
         res.set_heuristic(50)
Example #5
0
    def parse_link(self, parent_res, path):
        with open(path, "rb") as fh:
            metadata = decode_lnk(fh.read())

        if metadata is None:
            return False

        body_output = {
            build_key(k): v
            for k, v in flatten(metadata).items() if v
        }
        res = ResultSection("Metadata extracted by parse_lnk",
                            body_format=BODY_FORMAT.KEY_VALUE,
                            body=json.dumps(body_output),
                            parent=parent_res)

        bp = metadata.get("BasePath", "").strip()
        rp = metadata.get("RELATIVE_PATH", "").strip()
        nn = metadata.get("NetName", "").strip()
        cla = metadata.get("COMMAND_LINE_ARGUMENTS", "").strip()
        s = BAD_LINK_RE.search(cla.lower())
        if s:
            res.set_heuristic(1)
        res.add_tag(tag_type="file.name.extracted",
                    value=(bp or rp or nn).rsplit("\\")[-1])
        res.add_tag(tag_type="dynamic.process.command_line",
                    value=f"{(rp or bp or nn)} {cla}".strip())

        for k, v in body_output.items():
            tag_type = TAG_MAP.get("LNK", {}).get(k, None) or \
                       TAG_MAP.get(None, {}).get(k, None)
            if tag_type:
                res.add_tag(tag_type, v)

        return True
    def _create_random_section(self):
        # choose a random body format
        body_format = random.choice(FORMAT_LIST)

        # create a section with a random title
        section = ResultSection(get_random_phrase(3, 7), body_format=body_format)

        # choose random amount of lines in the body
        for _ in range(1, 5):
            # generate random line
            section.add_line(get_random_phrase(5, 10))

        # choose random amount of tags
        tags = flatten(get_random_tags())
        for key, val in tags.items():
            for v in val:
                section.add_tag(key, v)

        # set a heuristic a third of the time
        if random.choice([False, False, True]):
            section.set_heuristic(random.randint(1, 4))

        # Create random sub-sections
        if random.choice([False, False, True]):
            section.add_subsection(self._create_random_section())

        return section
    def execute(self, request):
        """Main Module. See README for details."""
        result = Result()
        self.sha = request.sha256
        local = request.file_path

        text_section = None
        kv_section = None

        extracted, metadata = self.dexray(request, local)

        num_extracted = len(request.extracted)
        if num_extracted != 0:
            text_section = ResultSection("DeXRAY found files:")
            for extracted in request.extracted:
                file_name = extracted.get('name')
                text_section.add_line(
                    f"Resubmitted un-quarantined file as : {file_name}")

        if metadata:
            # Can contain live URLs to the original content source
            kv_section = ResultSection("DeXRAY Quarantine Metadata",
                                       body_format=BODY_FORMAT.JSON,
                                       body=json.dumps(metadata))
            result.add_section(kv_section)

        for section in (text_section, kv_section):
            if section:
                result.add_section(section)
    def test_handle_artefact(artefact, expected_result_section_title):
        from assemblyline_v4_service.common.dynamic_service_helper import SandboxOntology, Artefact
        from assemblyline_v4_service.common.result import ResultSection

        if artefact is None:
            with pytest.raises(Exception):
                SandboxOntology._handle_artefact(artefact, None)
            return

        expected_result_section = None
        if expected_result_section_title is not None:
            expected_result_section = ResultSection(
                expected_result_section_title)
            expected_result_section.add_tag("dynamic.process.file_name",
                                            artefact["path"])

        parent_result_section = ResultSection("blah")
        a = Artefact(name=artefact["name"],
                     path=artefact["path"],
                     description=artefact["description"],
                     to_be_extracted=artefact["to_be_extracted"])
        SandboxOntology._handle_artefact(a, parent_result_section)
        if len(parent_result_section.subsections) > 0:
            actual_result_section = parent_result_section.subsections[0]
        else:
            actual_result_section = None

        if expected_result_section is None and actual_result_section is None:
            assert True
        else:
            assert check_section_equality(actual_result_section,
                                          expected_result_section)
    def test_set_heuristic_by_verdict(intezer_static_class_instance):
        from assemblyline_v4_service.common.result import ResultSection
        result_section = ResultSection("blah")
        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, None)
        assert result_section.heuristic is None

        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, "blah")
        assert result_section.heuristic is None

        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, "trusted")
        assert result_section.heuristic is None

        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, "malicious")
        assert result_section.heuristic.heur_id == 1

        result_section = ResultSection("blah")
        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, "known_malicious")
        assert result_section.heuristic.heur_id == 1

        result_section = ResultSection("blah")
        intezer_static_class_instance._set_heuristic_by_verdict(
            result_section, "suspicious")
        assert result_section.heuristic.heur_id == 2
Example #10
0
def stack_result(section: List[bytes]) -> Optional[ResultSection]:
    """ Generates a ResultSection from floss stacked strings output section """
    result = ResultSection('FLARE FLOSS Sacked Strings',
                           body_format=BODY_FORMAT.MEMORY_DUMP,
                           heuristic=Heuristic(3))
    assert result.heuristic
    strings = section[1:]

    if not strings:
        return None

    groups = group_strings(s.decode() for s in strings)
    for group in groups:
        res = ResultSection(
            f"Group: '{min(group, key=len)}' Strings: {len(group)}",
            body='\n'.join(group),
            body_format=BODY_FORMAT.MEMORY_DUMP)
        for string in group:
            ioc_tag(string.encode(), res, just_network=len(group) > 1000)
        result.add_subsection(res)

    if any(res.tags for res in result.subsections):
        result.heuristic.add_signature_id('stacked_ioc')

    return result
def _validate_tag(
    result_section: ResultSection,
    tag: str,
    value: Any,
    safelist: Dict[str, Dict[str, List[str]]] = None
) -> bool:
    """
    This method validates the value relative to the tag type before adding the value as a tag to the ResultSection.
    :param result_section: The ResultSection that the tag will be added to
    :param tag: The tag type that the value will be tagged under
    :param value: The item that will be tagged under the tag type
    :param safelist: The safelist containing matches and regexs. The product of a service using self.get_api_interface().get_safelist().
    :return: Tag was successfully added
    """
    if safelist is None:
        safelist = {}

    regex = _get_regex_for_tag(tag)
    if regex and not match(regex, value):
        return False

    if "ip" in tag and not is_valid_ip(value):
        return False

    if "domain" in tag:
        if not is_valid_domain(value):
            return False
        elif value in FALSE_POSITIVE_DOMAINS_FOUND_IN_PATHS:
            return False
        elif isinstance(value, str) and value.split(".")[-1] in COMMON_FILE_EXTENSIONS:
            return False

    if is_tag_safelisted(value, [tag], safelist):
        return False

    # if "uri" is in the tag, let's try to extract its domain/ip and tag it.
    if "uri_path" not in tag and "uri" in tag:
        # First try to get the domain
        valid_domain = False
        domain = search(DOMAIN_REGEX, value)
        if domain:
            domain = domain.group()
            valid_domain = _validate_tag(result_section, "network.dynamic.domain", domain, safelist)
        # Then try to get the IP
        valid_ip = False
        ip = search(IP_REGEX, value)
        if ip:
            ip = ip.group()
            valid_ip = _validate_tag(result_section, "network.dynamic.ip", ip, safelist)

        if value not in [domain, ip] and (valid_domain or valid_ip):
            result_section.add_tag(tag, safe_str(value))
        else:
            return False
    else:
        result_section.add_tag(tag, safe_str(value))

    return True
    def execute(self, request):
        result = Result()
        url = request.task.metadata.get('submitted_url')
        api_key = request.get_param("api_key")
        public = request.get_param("public")

        u = UrlScan(apikey=api_key, url=url, public=public)
        u.submit()

        # We need to wait for the API to process our request
        response = self.wait_processing(u)

        # We get the response parts that we want and merge them all together
        report = {
            **response.json()["verdicts"]["overall"],
            **response.json()["lists"],
            **response.json()["page"]
        }

        # We convert the "certicates" section from a list of dictionnaries to a dictionnary of lists
        certificates = report.pop("certificates")
        certificates = {
            k: [dic[k] for dic in certificates]
            for k in certificates[0]
        }

        # We add the converted section to the report
        report = {**report, **certificates}

        # We create the KEY_VALUE section to add the report to the result page
        kv_section = ResultSection("Urlscan.io report",
                                   body_format=BODY_FORMAT.KEY_VALUE,
                                   body=json.dumps(report))

        for domain in report["domains"]:
            kv_section.add_tag("network.static.domain", domain.strip())

        result.add_section(kv_section)

        # We get the preview of the website
        screenshot = u.getScreenshot()
        with open(self.working_directory + "/preview.png", "wb") as ofile:
            ofile.write(screenshot)

        # Adding the preview on the result page
        url_section = ResultSection(
            'Urlscan.io website screenshot',
            body_format=BODY_FORMAT.URL,
            body=json.dumps({
                "name": "The preview is also available here !",
                "url": response.json()["task"]["screenshotURL"]
            }))
        result.add_section(url_section)
        request.add_extracted(self.working_directory + "/preview.png",
                              "preview.png", "Here\'s the preview of the site")

        request.result = result
Example #13
0
    def parse_plist(self, pdict):
        """Attempts to extract and identify all known and unknown keys of a plist file.

        Args:
            pdict: Plist dictionary item.

        Returns:
            A list of known keys and a list of unknown keys.
        """

        idenkey_sec = ResultSection("Identified Keys")
        unkkey_sec = ResultSection("Unidentified Keys:")

        # Sometimes plist is a list of dictionaries, or it is just a list. Will merge dict /convert to dict for now
        if isinstance(pdict, list):
            pdict = self.transform_dicts(pdict)

        for k, i in list(pdict.items()):
            # Prepare Keys
            k = str(safe_str(k))
            k_noipad = k.replace("~ipad", "")

            # Prepare values
            if i is None:
                i = [""]
            elif not isinstance(i, list):
                i = [i]

            # Many plist files are duplicates of info.plist, do not report on keys already identified
            if k_noipad in self.reported_keys:
                if i in self.reported_keys[k_noipad]:
                    continue
                self.reported_keys[k_noipad].append(i)
            else:
                self.reported_keys[k_noipad] = [i]

            # Process known keys
            if k_noipad in self.known_keys:
                desc, create_tag = self.known_keys[k_noipad]

                idenkey_sec.add_line(f"{k} ({desc}): {', '.join([safe_str(x, force_str=True) for x in i])}")
                if create_tag:
                    for val in i:
                        idenkey_sec.add_tag(TAG_MAP[k_noipad.upper()], safe_str(val, force_str=True))

            else:
                unkkey_sec.add_line(f"{k}: {', '.join([safe_str(x, force_str=True) for x in i])}")

        if idenkey_sec.body is None:
            idenkey_sec = None

        if unkkey_sec.body is None:
            unkkey_sec = None

        return idenkey_sec, unkkey_sec
def get_result_subsection(result, title, heuristic):
    result_subsection = None
    # Set appropriate result subsection if it already exists
    for subsection in result.subsections:
        if subsection.title_text == title:
            result_subsection = subsection
    # Create appropriate result subsection if it doesn't already exist
    if not result_subsection:
        result_subsection = ResultSection(title)
        result.add_subsection(result_subsection)
        result_subsection.set_heuristic(heuristic)
    return result_subsection
 def resubmit_dex2jar_output(self, apk_file: str, target: str, result: Result, request):
     dex = os.path.join(self.working_directory, "classes.dex")
     self.get_dex(apk_file, dex)
     if os.path.exists(dex):
         d2j = Popen([self.dex2jar, "--output", target, dex],
                     stdout=PIPE, stderr=PIPE)
         d2j.communicate()
         if os.path.exists(target):
             res_sec = ResultSection("Classes.dex file was recompiled as a JAR and re-submitted for analysis")
             res_sec.add_line(f"JAR file resubmitted as: {os.path.basename(target)}")
             request.add_extracted(target, os.path.basename(target), "Dex2Jar output JAR file")
             result.add_section(res_sec)
def _section_traverser(section: ResultSection = None) -> ResultSection:
    """
    This function goes through each section and sends the tags to a function
    that will reduce specific tags

    :param section: An Assemblyline result section
    :return: Reduced Assemblyline result section
    """
    for subsection in section.subsections:
        _section_traverser(subsection)
    if section.tags:
        section.set_tags(_reduce_specific_tags(section.tags))
    return section
def add_results(result, data, data_deobfuscated):
    result_ioc = ResultSection('Found the following IoCs')
    result_formula = ResultSection('Suspicious formulas found in document')

    # Tag IoCs/formulas and generate result subsections
    tag_data(data, data_deobfuscated, result_ioc, result_formula)

    # Add 'IoCs' result section to results if IoCs were found
    if result_ioc.subsections:
        result.add_section(result_ioc)
    # Add 'Suspicious Formulas' result section to results if suspicious formulas were found
    if result_formula.subsections:
        result.add_section(result_formula)
 def manage_threat_level(self, data, result):
     if data['threat_level'] == 'Low Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(1))
     if data['threat_level'] == 'Moderate Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(2))
     if data['threat_level'] == 'High Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(3))
     result.add_section(threat_section)
Example #19
0
    def recurse_add_res(self, file_res, res_list, new_files, parent=None):
        for res_dic in res_list:
            # Check if condition is OK
            if self.pass_condition(res_dic.get("condition", None)):
                res = ResultSection(res_dic['title_text'],
                                    classification=res_dic.get('classification', Classification.UNRESTRICTED),
                                    parent=parent, body_format=res_dic.get('body_format', BODY_FORMAT.TEXT))
                heur_id = self.heuristic_alteration(res_dic.get('score_condition', None), res_dic['heur_id'])
                res.set_heuristic(heur_id)

                # Add Tags
                tags = res_dic.get('tags', [])
                for res_tag in tags:
                    res.add_tag(res_tag[0], res_tag[1])

                # Add body
                body = res_dic.get('body', None)
                if body:
                    res.set_body(body)

                # File for resubmit
                files = res_dic.get('files', [])
                for res_file in files:
                    if isinstance(res_file, tuple):
                        res_file = res_file[1]
                    new_files.append(res_file)

                # Add to file res if root result
                if parent is None:
                    file_res.add_section(res)
Example #20
0
def static_result(section: List[bytes], max_length: int,
                  st_max_size: int) -> Optional[ResultSection]:
    """ Generates a ResultSection from floss static strings output section """
    header = section[0]
    lines = section[1:]

    result = ResultSection(header.decode(errors='ignore'),
                           body_format=BODY_FORMAT.MEMORY_DUMP)
    for line in lines:
        if len(line) > max_length:
            continue
        if ioc_tag(line, result, just_network=len(lines) > st_max_size):
            result.add_line(line.decode(errors='ignore'))
    return result if result.body else None
    def check_for_b64(self, data, section):
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: Sub-section to be modified if base64 found

        Returns:
            decoded: Boolean which is true if base64 found
        """

        b64_matches = []
        # b64_matches_raw will be used for replacing in case b64_matches are modified
        b64_matches_raw = []
        decoded_param = data
        decoded = False

        for b64_match in re.findall(
                '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})',
                re.sub('\x3C\x00\x20{2}\x00', '', data)):
            b64 = b64_match.replace('\n', '').replace('\r', '').replace(
                ' ', '').replace('<', '')
            uniq_char = ''.join(set(b64))
            if len(uniq_char) > 6:
                if len(b64) >= 16 and len(b64) % 4 == 0:
                    b64_matches.append(b64)
                    b64_matches_raw.append(b64_match)
        for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw):
            try:
                base64data = binascii.a2b_base64(b64_string)
                # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex
                base64data_decoded = ' ' + base64data.decode('utf-16').encode(
                    'ascii', 'ignore')
                # Replace base64 from param with decoded string
                decoded_param = re.sub(b64_string_raw, base64data_decoded,
                                       decoded_param)
                decoded = True
            except Exception:
                pass

        if decoded:
            decoded_section = ResultSection('Possible Base64 found',
                                            parent=section,
                                            heuristic=Heuristic(5))
            decoded_section.add_line(
                f'Possible Base64 Decoded Parameters: {decoded_param}')
            self.find_ip(decoded_param)

        return decoded
    def extract_powershell(self, parameter, section):
        """Searches parameter for PowerShell, adds as extracted if found

        Args:
            parameter: String to be searched
            section: Section to be modified if PowerShell found
        """

        if re.findall(r'(?:powershell)|(?:pwsh)', parameter, re.IGNORECASE):
            self.found_powershell = True
            if type(parameter) == str:
                # Unicode-objects must be encoded before hashing
                sha256hash = hashlib.sha256(parameter.encode()).hexdigest()
            else:
                sha256hash = hashlib.sha256(parameter).hexdigest()
            ResultSection('Discovered PowerShell code in parameter.',
                          parent=section)

            # Add PowerShell code as extracted, account for duplicates
            if sha256hash not in self.file_hashes:
                self.file_hashes.append(sha256hash)
                powershell_filename = f'{sha256hash[0:25]}_extracted_powershell'
                powershell_file_path = os.path.join(self.working_directory,
                                                    powershell_filename)
                with open(powershell_file_path, 'w') as f:
                    f.write(parameter)
                    self.request.add_extracted(
                        powershell_file_path, powershell_filename,
                        'Discovered PowerShell code in parameter')
Example #23
0
    def handle_artefacts(artefact_list: list,
                         request: ServiceRequest) -> ResultSection:
        """
        Goes through each artefact in artefact_list, uploading them and adding result sections accordingly

        Positional arguments:
        artefact_list -- list of dictionaries that each represent an artefact
        """

        validated_artefacts = SandboxOntology._validate_artefacts(
            artefact_list)

        artefacts_result_section = ResultSection("Sandbox Artefacts")

        for artefact in validated_artefacts:
            SandboxOntology._handle_artefact(artefact,
                                             artefacts_result_section)

            if artefact.to_be_extracted:
                try:
                    request.add_extracted(artefact.path, artefact.name,
                                          artefact.description)
                except MaxExtractedExceeded:
                    # To avoid errors from being raised when too many files have been extracted
                    pass
            else:
                request.add_supplementary(artefact.path, artefact.name,
                                          artefact.description)

        return artefacts_result_section if artefacts_result_section.subsections else None
Example #24
0
    def execute(self, request):
        temp_filename = request.file_path

        # Filter out large documents
        if os.path.getsize(temp_filename) > self.max_pdf_size:
            file_res = Result()
            res = (ResultSection(
                f"PDF Analysis of the file was skipped because the "
                f"file is too big (limit is {(self.max_pdf_size / 1000 / 1000)} MB)."
            ))

            file_res.add_section(res)
            request.result = file_res
            return

        filename = os.path.basename(temp_filename)
        # noinspection PyUnusedLocal
        file_content = ''
        with open(temp_filename, 'rb') as f:
            file_content = f.read()

        if '<xdp:xdp'.encode(encoding='UTF-8') in file_content:
            self.find_xdp_embedded(filename, file_content, request)

        self.peepdf_analysis(temp_filename, file_content, request)
Example #25
0
    def execute(self, request):
        result = Result()

        file_path = request.file_path
        file_type = request.file_type

        shutil.copyfile(file_path, self.working_directory + "/analyzed")

        p1 = subprocess.Popen(
            "java -jar /var/lib/assemblyline/StegExpose/StegExpose.jar " +
            self.working_directory + " standard default " +
            self.working_directory + "/report.csv",
            shell=True)
        p1.wait()

        lsb_steg_results = self.read_csv(self.working_directory +
                                         "/report.csv")
        lsb_steg_results = self.beautify_dict(lsb_steg_results)

        kv_section = ResultSection("Result of the LSB steganalysis",
                                   body_format=BODY_FORMAT.KEY_VALUE,
                                   body=json.dumps(lsb_steg_results))
        result.add_section(kv_section)

        request.result = result
def test_validate_tag(tag, value, expected_tags, added_tag):
    from assemblyline_v4_service.common.result import ResultSection
    from assemblyline_v4_service.common.tag_helper import add_tag
    res_sec = ResultSection("blah")
    safelist = {"match": {"domain": ["blah.ca"]}}
    assert add_tag(res_sec, tag, value, safelist) == added_tag
    assert res_sec.tags == expected_tags
Example #27
0
    def dump_dir(self, dir_index, path, parent_res, is_orphan):
        # 1. make sure the directory wasn't dumped already
        if dir_index in self.property_dict and self.property_dict[dir_index][
                1] is False:
            self.property_dict[dir_index][1] = True

            field = self.property_dict[dir_index][0]
            field_name = field['name'].display[1:-1]
            field_full_name = path + field_name

            # 2. create a res with it's name
            res = ResultSection(f"OLE2 STORAGE: {field_full_name}")

            # 3. Dump the dir property
            self.dump_property(self.property_dict[dir_index][0], path,
                               dir_index, res, parent_res, is_orphan)

            # 3. navigate the red-black tree
            self.dump_siblings(field['child'].display, field_full_name, res,
                               parent_res, is_orphan)

            if len(res.subsections) > 0:
                parent_res.add_subsection(res)

            # call recursively our children when there is a children
            if dir_index in self.children:
                for sub_dir in self.children[dir_index][1]:
                    self.dump_dir(sub_dir, field_full_name + '\\', parent_res,
                                  is_orphan)
Example #28
0
    def run(self):
        hachoir_config.quiet = True
        self.additional_parsing_fields = {}
        self.ole2parser = None
        self.office_root_entry_parser = None
        self.children = {}
        self.parent = {}
        self.property_dict = {}
        self.invalid_streams = []
        self.invalid_properties_count = 0

        parser = createParser(self.file_path)
        if parser is not None:
            with parser:
                tags = parser.getParserTags()
                parser_id = tags.get('id', 'unknown')

                # Do OLE2 deep analysis if requested
                if parser_id == 'ole2':
                    ole2_res = ResultSection(f"Hachoir OLE2 Deep Analysis",
                                             parent=self.parent_res)
                    # this is just so that we don't bail on the NULL property type and we keep on going.
                    for (key, value) in PropertyContent.TYPE_INFO.items():
                        if value[1] is None:
                            PropertyContent.TYPE_INFO[key] = (value[0],
                                                              DummyObject)
                    self.parse_ole2(parser, ole2_res)
def test_add_tag(value, expected_tags, tags_were_added):
    from assemblyline_v4_service.common.result import ResultSection
    from assemblyline_v4_service.common.tag_helper import add_tag
    res_sec = ResultSection("blah")
    tag = "blah"
    safelist = {"match": {"domain": ["blah.ca"]}}
    assert add_tag(res_sec, tag, value, safelist) == tags_were_added
    assert res_sec.tags == expected_tags
 def test_section_traverser(tags, correct_tags):
     from assemblyline_v4_service.common.section_reducer import _section_traverser
     from assemblyline_v4_service.common.result import ResultSection
     section = ResultSection("blah")
     subsection = ResultSection("subblah")
     for t_type, t_values in tags.items():
         for t_value in t_values:
             subsection.add_tag(t_type, t_value)
     section.add_subsection(subsection)
     assert _section_traverser(section).subsections[0].tags == correct_tags