def run_strings_analysis(self, apk_file, result: Result):
        string_args = ['d', 'strings', apk_file]
        strings, _ = self.run_appt(string_args)
        if not strings or strings == "String pool is unitialized.\n":
            ResultSection("No strings found in APK", body="This is highly unlikely and most-likely malicious.",
                          parent=result, heuristic=Heuristic(6))
        else:
            res_strings = ResultSection("Strings Analysis", parent=result)

            config_args = ['d', 'configurations', apk_file]
            configs, _ = self.run_appt(config_args)
            languages = []
            for line in configs.splitlines():
                config = line.upper()
                if config in ISO_LOCALES:
                    languages.append(config)
                    res_strings.add_tag('file.apk.locale', config)

            data_line = strings.split("\n", 1)[0]
            count = int(data_line.split(" entries")[0].rsplit(" ", 1)[1])
            styles = int(data_line.split(" styles")[0].rsplit(" ", 1)[1])
            if count < 50:
                ResultSection("Low volume of strings, this is suspicious.", parent=res_strings,
                              body_format=BODY_FORMAT.MEMORY_DUMP, body=safe_str(strings), heuristic=Heuristic(7))

            if len(languages) < 2:
                ResultSection("This app is not built for multiple languages. This is unlikely.",
                              parent=res_strings, heuristic=Heuristic(8))

            res_strings.add_line(f"Total string count: {count}")
            res_strings.add_line(f"Total styles: {styles}")
            if languages:
                res_strings.add_line(f"Languages: {', '.join(languages)}")
 def manage_threat_level(self, data, result):
     if data['threat_level'] == 'Low Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(1))
     if data['threat_level'] == 'Moderate Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(2))
     if data['threat_level'] == 'High Risk':
         threat_section = ResultSection("threat level : {0}".format(
             data['threat_level']),
                                        heuristic=Heuristic(3))
     result.add_section(threat_section)
Exemple #3
0
    def _get_category_section(self, category: str,
                              tags: Iterator[AVClassTag]) -> ResultSection:
        """
        Gets a `ResultSection` for a list of tags from a single category.

        Result contains table with AVclass tag information in descending order by rank.

        :param category: Category of tags
        :param tags: Tags belonging to category
        :return: `ResultSection`
        """
        tags = sorted(tags, key=lambda t: t.rank, reverse=True)

        category_name, heur_id, tag_type = AVCLASS_CATEGORY[category]
        tag_table = [{
            'name': tag.name,
            'category': category_name,
            'path': tag.path,
            'rank': tag.rank
        } for tag in tags]

        section = ResultSection(
            f'AVclass extracted {len(tags)} {category_name} tags',
            body=json.dumps(tag_table),
            body_format=BODY_FORMAT.TABLE,
            heuristic=Heuristic(heur_id) if heur_id is not None else None)
        if tag_type is not None:
            for tag in tags:
                section.add_tag(tag_type, tag.name)

        return section
Exemple #4
0
 def additional_parsing(self, file_path: str) -> Optional[ResultSection]:
     urls = set()
     try:
         with pikepdf.open(file_path) as pdf:
             num_pages = len(pdf.pages)
             for page in pdf.pages:
                 if '/Annots' not in page:
                     continue
                 for annot in page['/Annots'].as_list():
                     if annot.get('/Subtype') == '/Link':
                         if '/A' not in annot:
                             continue
                     _url = annot['/A'].get('/URI')
                     if not hasattr(_url, '__str__'):
                         continue
                     url = str(_url)
                     if re.match(FULL_URI, url):
                         urls.add(url)
         if not urls:
             return None
         patterns = PatternMatch()
         body = '\n'.join(urls)
         tags: dict[str, set[bytes]] = patterns.ioc_match(body.encode())
         result = ResultSection(
             'URL in Annotations',
             heuristic=Heuristic(
                 27, signature='one_page' if num_pages == 1 else None),
             body=body)
         for ty, vals in tags.items():
             for val in vals:
                 result.add_tag(ty, val)
         return result
     except Exception as e:
         self.log.warning(f'pikepdf failed to parse sample: {e}')
         return None
Exemple #5
0
def stack_result(section: List[bytes]) -> Optional[ResultSection]:
    """ Generates a ResultSection from floss stacked strings output section """
    result = ResultSection('FLARE FLOSS Sacked Strings',
                           body_format=BODY_FORMAT.MEMORY_DUMP,
                           heuristic=Heuristic(3))
    assert result.heuristic
    strings = section[1:]

    if not strings:
        return None

    groups = group_strings(s.decode() for s in strings)
    for group in groups:
        res = ResultSection(
            f"Group: '{min(group, key=len)}' Strings: {len(group)}",
            body='\n'.join(group),
            body_format=BODY_FORMAT.MEMORY_DUMP)
        for string in group:
            ioc_tag(string.encode(), res, just_network=len(group) > 1000)
        result.add_subsection(res)

    if any(res.tags for res in result.subsections):
        result.heuristic.add_signature_id('stacked_ioc')

    return result
Exemple #6
0
def decoded_result(text: bytes) -> Optional[ResultSection]:
    """ Generates a ResultSection from floss decoded strings output section """
    lines = text.splitlines()
    lines[0] = b'Most likely decoding functions:'
    body = b'\n'.join(lines[:-1])

    strings = re.findall(rb'^\[[A-Z]+\]\s+0x[0-9A-F]+\s+(.+)',
                         body,
                         flags=re.M)
    if not strings:
        return None

    result = ResultSection('FLARE FLOSS Decoded Strings',
                           body_format=BODY_FORMAT.MEMORY_DUMP,
                           heuristic=Heuristic(1))
    assert result.heuristic
    ioc = False
    for string in strings:
        ioc = ioc_tag(string, result, just_network=len(strings) > 1000) or ioc
        result.add_tag('file.string.decoded', string[:75])
    if ioc:
        result.heuristic.add_signature_id('decoded_ioc')

    result.add_line(body.decode())
    return result
    def add_ip_tags(self):
        """
        Adds tags for urls and ip addresses from given lists
        """

        if self.url_list or self.ip_list:
            sec_iocs = ResultSection(
                "ViperMonkey has found the following IOCs:",
                parent=self.result,
                heuristic=Heuristic(4))

            # Add Urls
            for url in set(self.url_list):
                sec_iocs.add_line(url)
                sec_iocs.add_tag('network.static.uri', url)
                try:
                    parsed = urlparse(url)
                    if not re.match(IP_ONLY_REGEX, parsed.hostname):
                        sec_iocs.add_tag('network.static.domain',
                                         parsed.hostname)

                except Exception:
                    pass

            # Add IPs
            for ip in set(self.ip_list):
                sec_iocs.add_line(ip)
                # Checking if IP ports also found and adding the corresponding tags
                if re.findall(":", ip):
                    net_ip, net_port = ip.split(':')
                    sec_iocs.add_tag('network.static.ip', net_ip)
                    sec_iocs.add_tag('network.port', net_port)
                else:
                    sec_iocs.add_tag('network.static.ip', ip)
    def find_scripts_and_exes(apktool_out_dir: str, result: Result):
        scripts = []
        executables = []
        apks = []

        # We are gonna do the full apktool output dir here but in case we want to do less,
        # you can edit the test_path list
        test_paths = [apktool_out_dir]
        for path in test_paths:
            for root, _, files in os.walk(path):
                for f in files:
                    if f.endswith(".smali"):
                        continue
                    cur_file = os.path.join(root, f)
                    file_type = fileinfo(cur_file)['type']

                    if "code/sh" in file_type:
                        scripts.append(cur_file.replace(apktool_out_dir, ''))
                    elif "executable/linux" in file_type:
                        executables.append(cur_file.replace(apktool_out_dir, ''))
                    elif "android/apk" in file_type:
                        executables.append(cur_file.replace(apktool_out_dir, ''))

        if scripts:
            res_script = ResultSection("Shell script(s) found inside APK", parent=result,
                                       heuristic=Heuristic(1))
            for script in sorted(scripts)[:20]:
                res_script.add_line(script)
            if len(scripts) > 20:
                res_script.add_line(f"and {len(scripts) - 20} more...")

        if executables:
            res_exe = ResultSection("Executable(s) found inside APK", parent=result,
                                    heuristic=Heuristic(2))
            for exe in sorted(executables)[:20]:
                res_exe.add_line(exe)
            if len(executables) > 20:
                res_exe.add_line(f"and {len(executables) - 20} more...")

        if apks:
            res_apk = ResultSection("Other APKs where found inside the APK", parent=result,
                                    heuristic=Heuristic(19))
            for apk in sorted(apks)[:20]:
                res_apk.add_line(apk)
            if len(apks) > 20:
                res_apk.add_line(f"and {len(apks) - 20} more...")
    def check_file_name_anomalies(self, filename):
        """Filename anomalies detection"""

        is_double_ext, f_ext = self.fna_check_double_extension(filename)
        is_empty_filename = self.fna_check_empty_filename(filename, f_ext)
        too_many_whitespaces = self.fna_check_filename_ws(filename, f_ext)
        has_unicode_ext_hiding_ctrls = self.fna_check_unicode_bidir_ctrls(filename, f_ext)

        file_res = Result()

        if too_many_whitespaces or is_double_ext or has_unicode_ext_hiding_ctrls or is_empty_filename:
            res = ResultSection(title_text="File Name Anomalies", parent=file_res)

            # Tag filename as it might be of interest
            res.add_tag("file.name.extracted", filename)

            # Remove Unicode controls, if any, for reporting
            fn_no_controls = "".join(
                c for c in filename if c not in ["\u202E", "\u202B", "\u202D", "\u202A", "\u200E", "\u200F"]
            )

            # Also add a line with "actual" file name
            res.add_line(f"Actual file name: {wrap_bidir_unicode_string(fn_no_controls)}")

            if too_many_whitespaces:
                sec = ResultSection("Too many whitespaces", parent=res, heuristic=Heuristic(1))
                sec.add_tag("file.name.anomaly", "TOO_MANY_WHITESPACES")
                sec.add_tag("file.behavior", "File name has too many whitespaces")

            if is_double_ext:
                sec = ResultSection("Double file extension", parent=res, heuristic=Heuristic(2))
                sec.add_tag("file.name.anomaly", "DOUBLE_FILE_EXTENSION")
                sec.add_tag("file.behavior", "Double file extension")

            if has_unicode_ext_hiding_ctrls:
                sec = ResultSection("Hidden launchable file extension", parent=res, heuristic=Heuristic(3))
                sec.add_tag("file.name.anomaly", "UNICODE_EXTENSION_HIDING")
                sec.add_tag("file.behavior", "Real file extension hidden using unicode trickery")

            if is_empty_filename:
                sec = ResultSection("Empty Filename", parent=res, heuristic=Heuristic(4))
                sec.add_tag("file.name.anomaly", "FILENAME_EMPTY_OR_ALL_SPACES")
                sec.add_tag("file.behavior", "File name is empty or all whitespaces")

        return file_res
    def bbcrack_results(self,
                        request: ServiceRequest) -> Optional[ResultSection]:
        """
        Balbuzard's bbcrack XOR'd strings to find embedded patterns/PE files of interest

        Args:
            request: AL request object with result section

        Returns:
            The result section (with request.result as its parent) if one is created
        """
        x_res = (ResultSection("BBCrack XOR'd Strings:",
                               body_format=BODY_FORMAT.MEMORY_DUMP,
                               heuristic=Heuristic(2)))
        if request.deep_scan:
            xresult = bbcrack(request.file_contents, level=2)
        else:
            xresult = bbcrack(request.file_contents, level=1)
        xformat_string = '%-20s %-7s %-7s %-50s'
        xor_al_results = []
        xindex = 0
        for transform, regex, offset, score, smatch in xresult:
            if regex == 'EXE_HEAD':
                xindex += 1
                xtemp_file = os.path.join(
                    self.working_directory,
                    f"EXE_HEAD_{xindex}_{offset}_{score}.unXORD")
                with open(xtemp_file, 'wb') as xdata:
                    xdata.write(smatch)
                pe_extracted = self.pe_dump(
                    request,
                    xtemp_file,
                    offset,
                    file_string="xorpe_decoded",
                    msg="Extracted xor file during FrakenStrings analysis.")
                if pe_extracted:
                    xor_al_results.append(
                        xformat_string %
                        (str(transform), offset, score, "[PE Header Detected. "
                         "See Extracted files]"))
            else:
                if not regex.startswith("EXE_"):
                    x_res.add_tag(self.BBCRACK_TO_TAG.get(regex, regex),
                                  smatch)
                xor_al_results.append(
                    xformat_string %
                    (str(transform), offset, score, safe_str(smatch)))
        # Result Graph:
        if len(xor_al_results) > 0:
            xcolumn_names = ('Transform', 'Offset', 'Score', 'Decoded String')
            x_res.add_line(xformat_string % xcolumn_names)
            x_res.add_line(xformat_string % tuple('-' * len(s)
                                                  for s in xcolumn_names))
            x_res.add_lines(xor_al_results)
            request.result.add_section(x_res)
            return x_res
        return None
Exemple #11
0
    def add_image(self, path: str, name: str, description: str,
                  classification: Optional[Classification] = None,
                  ocr_heuristic_id: Optional[int] = None) -> dict:
        """
        Add a image file to be viewed in the result section.

        :param path: Complete path to the image file
        :param name: Display name of the image file
        :param description: Descriptive text about the image file
        :param classification: Classification of the image file (default: service classification)
        :return: None
        """

        with tempfile.NamedTemporaryFile(dir=self._working_directory, delete=False) as outtmp:
            with tempfile.NamedTemporaryFile(dir=self._working_directory, delete=False) as thumbtmp:
                # Load Image
                img = Image.open(path)

                # Force image format switch to prevent exploit to cross-over
                img_format = 'WEBP'
                if img.format == img_format:
                    img_format = 'PNG'

                if img_format == "WEBP" and (img.height > WEBP_MAX_SIZE or img.width > WEBP_MAX_SIZE):
                    # Maintain aspect ratio
                    img.thumbnail((WEBP_MAX_SIZE, WEBP_MAX_SIZE), Image.ANTIALIAS)

                # Save and upload new image
                img.save(outtmp.name, format=img_format)
                img_res = self.task.add_supplementary(outtmp.name, name, description, classification,
                                                      is_section_image=True)

                # Save and upload thumbnail
                img.thumbnail((128, 128))
                img.save(thumbtmp.name, format=img_format, optimize=True)
                thumb_res = self.task.add_supplementary(thumbtmp.name, f"{name}.thumb",
                                                        f"{description} (thumbnail)", classification,
                                                        is_section_image=True)

        data = {'img': {k: v for k, v in img_res.items() if k in ['name', 'description', 'sha256']},
                'thumb': {k: v for k, v in thumb_res.items() if k in ['name', 'description', 'sha256']}}

        if ocr_heuristic_id:
            try:
                detections = ocr_detections(path)
                if detections:
                    heuristic = Heuristic(ocr_heuristic_id, signatures={k: len(v) for k, v in detections.items()})
                    ocr_section = ResultKeyValueSection(f'Suspicious strings found during OCR analysis on file {name}')
                    ocr_section.set_heuristic(heuristic)
                    for k, v in detections.items():
                        ocr_section.set_item(k, v)
                    data['ocr_section'] = ocr_section
            except ImportError as e:
                self.log.warning(str(e))
        return data
    def generate_results(presults, result, analysis_results, request):
        if presults['unpacked']:
            result.add_section(
                ResultSection("Successully unpacked binary.",
                              heuristic=Heuristic(1)))

        for r in presults['unpacked_samples']:
            if len(r['malware_id']) > 0:
                for rm in r['malware_id']:
                    section = ResultSection("{} - {}".format(
                        r['sha256'], rm['name']),
                                            heuristic=Heuristic(2))
                    section.add_line("Details: {}".format(rm['reference']))
                    result.add_section(section)
            request.add_extracted(r['data_path'], r['sha256'],
                                  f'Unpacked from {request.sha256}')

        result.add_section(
            ResultSection(f"UNPACME Detailed Results",
                          body_format=BODY_FORMAT.JSON,
                          body=json.dumps(analysis_results['results'])))

        return result, request
    def check_for_b64(self, data, section):
        """Search and decode base64 strings in sample data.

        Args:
            data: Data to be parsed
            section: Sub-section to be modified if base64 found

        Returns:
            decoded: Boolean which is true if base64 found
        """

        b64_matches = []
        # b64_matches_raw will be used for replacing in case b64_matches are modified
        b64_matches_raw = []
        decoded_param = data
        decoded = False

        for b64_match in re.findall(
                '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})',
                re.sub('\x3C\x00\x20{2}\x00', '', data)):
            b64 = b64_match.replace('\n', '').replace('\r', '').replace(
                ' ', '').replace('<', '')
            uniq_char = ''.join(set(b64))
            if len(uniq_char) > 6:
                if len(b64) >= 16 and len(b64) % 4 == 0:
                    b64_matches.append(b64)
                    b64_matches_raw.append(b64_match)
        for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw):
            try:
                base64data = binascii.a2b_base64(b64_string)
                # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex
                base64data_decoded = ' ' + base64data.decode('utf-16').encode(
                    'ascii', 'ignore')
                # Replace base64 from param with decoded string
                decoded_param = re.sub(b64_string_raw, base64data_decoded,
                                       decoded_param)
                decoded = True
            except Exception:
                pass

        if decoded:
            decoded_section = ResultSection('Possible Base64 found',
                                            parent=section,
                                            heuristic=Heuristic(5))
            decoded_section.add_line(
                f'Possible Base64 Decoded Parameters: {decoded_param}')
            self.find_ip(decoded_param)

        return decoded
    def embedded_pe_results(
            self, request: ServiceRequest) -> Optional[ResultSection]:
        """
        Finds, extracts and reports embedded executables

        Args:
            request: AL request object with result section

        Returns:
            The result section (with request.result as its parent) if one is created
        """
        # PE Strings
        pat_exedos = rb'(?s)This program cannot be run in DOS mode'
        pat_exeheader = rb'(?s)MZ.{32,1024}PE\000\000.+'

        embedded_pe = False
        for pos_exe in re.findall(pat_exeheader, request.file_contents[1:]):
            if re.search(pat_exedos, pos_exe):
                pe_sha256 = hashlib.sha256(pos_exe).hexdigest()
                temp_file = os.path.join(self.working_directory,
                                         "EXE_TEMP_{}".format(pe_sha256))

                with open(temp_file, 'wb') as pedata:
                    pedata.write(pos_exe)

                embedded_pe = embedded_pe or self.pe_dump(
                    request,
                    temp_file,
                    offset=0,
                    file_string="embed_pe",
                    msg="PE header strings discovered in sample",
                    fail_on_except=True)
        # Report embedded PEs if any are found
        if embedded_pe:
            return ResultSection(
                "Embedded PE header discovered in sample. See extracted files.",
                heuristic=Heuristic(3),
                parent=request.result)
        return None
Exemple #15
0
    def execute(self, request):
        request.result = Result()
        request.set_service_context(self.get_tool_version())
        temp_filename = request.file_path
        filename = os.path.basename(temp_filename)
        extract_dir = os.path.join(self.working_directory, f"{filename}_extracted")
        decompiled_dir = os.path.join(self.working_directory, f"{filename}_decompiled")
        file_res = request.result
        new_files = []
        supplementary_files = []
        imp_res_list = []
        res_list = []

        if request.file_type == "java/jar":
            self.decompile_jar(temp_filename, decompiled_dir)
            if self.jar_extract(temp_filename, extract_dir):
                # Analysis properties
                self.classloader_found = 0
                self.security_found = 0
                self.url_found = 0
                self.runtime_found = 0
                self.applet_found = 0

                self.manifest_tags = []
                self.signature_block_certs = []

                def analyze_file(root, cf, file_res, imp_res_list, supplementary_files, decompiled_dir, extract_dir):
                    cur_file_path = os.path.join(root.decode('utf-8'), cf.decode('utf-8'))
                    with open(cur_file_path, "rb") as cur_file:
                        start_bytes = cur_file.read(24)

                        ##############################
                        # Executables in JAR
                        ##############################
                        cur_ext = os.path.splitext(cf)[1][1:].upper()
                        if start_bytes[:2] == b"MZ":
                            mz_res = dict(
                                title_text=f"Embedded executable file found: {cf} "
                                "There may be a malicious intent.",
                                heur_id=1,
                                tags=[('file.behavior', "Embedded PE")],
                                score_condition=APPLET_MZ,
                            )
                            imp_res_list.append(mz_res)

                        ##############################
                        # Launchable in JAR
                        ##############################
                        elif cur_ext in G_LAUNCHABLE_EXTENSIONS:
                            l_res = dict(
                                title_text=f"Launch-able file type found: {cf}"
                                "There may be a malicious intent.",
                                heur_id=2,
                                tags=[('file.behavior', "Launch-able file in JAR")],
                                score_condition=APPLET_MZ,
                            )
                            imp_res_list.append(l_res)

                        if cur_file_path.upper().endswith('.CLASS'):
                            self.analyse_class_file(file_res, cf, cur_file, cur_file_path,
                                                    start_bytes, imp_res_list, supplementary_files,
                                                    decompiled_dir, extract_dir)

                for root, _, files in os.walk(extract_dir.encode('utf-8')):
                    logging.info(f"Extracted: {root} - {files}")

                    # if the META-INF folder is encountered
                    if root.upper().endswith(b'META-INF'):  # only top level meta
                        self.analyse_meta_information(file_res, root, supplementary_files, extract_dir)
                        continue

                    with ThreadPoolExecutor() as executor:
                        for cf in files:
                            executor.submit(analyze_file, root, cf, file_res, imp_res_list,
                                            supplementary_files, decompiled_dir, extract_dir)

                res = ResultSection("Analysis of the JAR file")

                res_meta = ResultSection("[Meta Information]", parent=res)
                if len(self.manifest_tags) > 0:
                    res_manifest = ResultSection("Manifest File Information Extract",
                                                 parent=res_meta)
                    for tag, val in self.manifest_tags:
                        res_manifest.add_tag(tag, val)

                for res_cert in self.signature_block_certs:
                    res_meta.add_subsection(res_cert)

                if self.runtime_found > 0 \
                        or self.applet_found > 0 \
                        or self.classloader_found > 0 \
                        or self.security_found > 0 \
                        or self.url_found > 0:
                    res.add_line("All suspicious class files were saved as supplementary files.")

                res_class = ResultSection("[Suspicious classes]", parent=res)

                if self.runtime_found > 0:
                    ResultSection("Runtime Found",
                                  body=f"java/lang/Runtime: {self.runtime_found}",
                                  heuristic=Heuristic(10),
                                  parent=res_class)

                if self.applet_found > 0:
                    ResultSection("Applet Found",
                                  body=f"java/applet/Applet: {self.applet_found}",
                                  heuristic=Heuristic(6),
                                  parent=res_class)

                if self.classloader_found > 0:
                    ResultSection("Classloader Found",
                                  body=f"java/lang/ClassLoader: {self.classloader_found}",
                                  heuristic=Heuristic(7),
                                  parent=res_class)

                if self.security_found > 0:
                    ResultSection("Security Found",
                                  body=f"java/security/*: {self.security_found}",
                                  heuristic=Heuristic(8),
                                  parent=res_class)

                if self.url_found > 0:
                    ResultSection("URL Found",
                                  body=f"java/net/URL: {self.url_found}",
                                  heuristic=Heuristic(9),
                                  parent=res_class)

                res_list.append(res)

        # Add results if any
        self.recurse_add_res(file_res, imp_res_list, new_files)
        for res in res_list:
            file_res.add_section(res)

        # Submit embedded files
        if len(new_files) > 0:
            new_files = sorted(list(set(new_files)))
            txt = f"Extracted from 'JAR' file {filename}"
            for embed in new_files:
                request.add_extracted(embed, embed.replace(extract_dir + "/", "").replace(decompiled_dir + "/", ""),
                                      txt, safelist_interface=self.api_interface)

        if len(supplementary_files) > 0:
            supplementary_files = sorted(list(set(supplementary_files)))
            for path, name, desc in supplementary_files:
                request.add_supplementary(path, name, desc)
    def validate_certs(apktool_out_dir: str, result: Result):
        has_cert = False
        for root, _, files in os.walk(os.path.join(apktool_out_dir, "original", "META-INF")):
            for f in files:
                cur_file = os.path.join(root, f)
                stdout, stderr = Popen(["keytool", "-printcert", "-file", cur_file],
                                       stderr=PIPE, stdout=PIPE).communicate()
                stdout = safe_str(stdout)
                if stdout:
                    if "keytool error" not in stdout:
                        has_cert = True
                        issuer = ""
                        owner = ""
                        country = ""
                        valid_from = ""
                        valid_to = ""
                        valid_year_end = 0
                        valid_year_start = 0
                        valid_until_date = time.time()
                        play_store_min = 'Sat Oct 22 00:00:00 2033'
                        play_store_min_valid_date = time.mktime(time.strptime(play_store_min, "%a %b %d %H:%M:%S %Y"))

                        for line in stdout.splitlines():
                            if "Owner:" in line:
                                owner = line.split(": ", 1)[1]
                                country = owner.split("C=")
                                if len(country) != 1:
                                    country = country[1]
                                else:
                                    country = ""

                            if "Issuer:" in line:
                                issuer = line.split(": ", 1)[1]

                            if "Valid from:" in line:
                                valid_from = line.split(": ", 1)[1].split(" until:")[0]
                                valid_to = line.rsplit(": ", 1)[1]

                                valid_from_splitted = valid_from.split(" ")
                                valid_to_splitted = valid_to.split(" ")

                                valid_year_start = int(valid_from_splitted[-1])
                                valid_year_end = int(valid_to_splitted[-1])

                                valid_until = " ".join(valid_to_splitted[:-2] + valid_to_splitted[-1:])
                                valid_until_date = time.mktime(time.strptime(valid_until, "%a %b %d %H:%M:%S %Y"))

                        res_cert = ResultSection("Certificate Analysis", body=safe_str(stdout),
                                                 parent=result, body_format=BODY_FORMAT.MEMORY_DUMP)

                        res_cert.add_tag('cert.valid.start', valid_from)
                        res_cert.add_tag('cert.valid.end', valid_to)
                        res_cert.add_tag('cert.issues', issuer)
                        res_cert.add_tag('cert.owner', owner)

                        if owner == issuer:
                            ResultSection("Certificate is self-signed", parent=res_cert,
                                          heuristic=Heuristic(10))

                        if not country:
                            ResultSection("Certificate owner has no country", parent=res_cert,
                                          heuristic=Heuristic(11))

                        if valid_year_start < 2008:
                            ResultSection("Certificate valid before first android release", parent=res_cert,
                                          heuristic=Heuristic(12))

                        if valid_year_start > valid_year_end:
                            ResultSection("Certificate expires before validity date starts", parent=res_cert,
                                          heuristic=Heuristic(16))

                        if (valid_year_end - valid_year_start) > 30:
                            ResultSection("Certificate valid more then 30 years", parent=res_cert,
                                          heuristic=Heuristic(13))

                        if valid_until_date < play_store_min_valid_date:
                            ResultSection("Certificate not valid until minimum valid playstore date", parent=res_cert,
                                          heuristic=Heuristic(20))

                        if country:
                            try:
                                int(country)
                                is_int_country = True
                            except Exception:
                                is_int_country = False

                            if len(country) != 2 or is_int_country:
                                ResultSection("Invalid country code in certificate owner", parent=res_cert,
                                              heuristic=Heuristic(14))

                        if f != "CERT.RSA":
                            ResultSection(f"Certificate name not using conventional name: {f}", parent=res_cert,
                                          heuristic=Heuristic(15))

        if not has_cert:
            ResultSection("This APK is not signed", parent=result, heuristic=Heuristic(9))
    def execute(self, request):
        self.result = Result()
        request.result = self.result
        self.request = request

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions = []
        external_functions = []
        tmp_iocs = []
        output_results = {}

        # Running ViperMonkey
        try:
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             'vipermonkey_compat.py2'), request.file_path
            ])
            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            stdout, _ = p.communicate()

            # Read output
            if stdout:
                for l in stdout.splitlines():
                    if l.startswith(b"{") and l.endswith(b"}"):
                        try:
                            output_results = json.loads(l)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                l.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][], if error, return is None
                if type(output_results.get('vmonkey_values')) == dict:
                    '''
                    Structure of variable "actions" is as follows:
                    [action, description, parameter]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameter: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    '''
                    actions = output_results['vmonkey_values']['actions']
                    external_functions = output_results['vmonkey_values'][
                        'external_funcs']
                    tmp_iocs = output_results['vmonkey_values']['tmp_iocs']
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            raise

        # Add vmonkey log as a supplemental file
        if 'stdout' in output_results:
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f'{request.sid}_vipermonkey_output.log')
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results['stdout'])

            self.request.add_supplementary(temp_log_copy,
                                           'vipermonkey_output.log',
                                           'ViperMonkey log output')
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1))

        if len(actions) > 0:
            # Creating action section
            action_section = ResultSection('Recorded Actions:',
                                           parent=self.result)
            action_section.add_tag('technique.macro', 'Contains VBA Macro(s)')
            for action in actions:  # Creating action sub-sections for each action
                cur_action = action[0]
                cur_description = action[2] if action[2] else cur_action

                # Entry point actions have an empty description field, re-organize result section for this case
                if cur_action == 'Found Entry Point':
                    sub_action_section = ResultSection('Found Entry Point',
                                                       parent=action_section)
                    sub_action_section.add_line(action[1])
                else:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(cur_description,
                                                       parent=action_section)
                    if cur_description == 'Shell function':
                        sub_action_section.set_heuristic(2)

                    # Parameters are sometimes stored as a list, account for this
                    if isinstance(action[1], list):
                        for item in action[1]:
                            # Parameters includes more than strings (booleans for example)
                            if isinstance(item, str):
                                # Check for PowerShell
                                self.extract_powershell(
                                    item, sub_action_section)
                        # Join list items into single string
                        param = ', '.join(str(a) for a in action[1])

                    else:
                        param = action[1]
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(param, str):
                            self.extract_powershell(param, sub_action_section)

                    sub_action_section.add_line(f'Action: {cur_action}')
                    sub_action_section.add_line(f'Parameters: {param}')

                    # If decoded is true, possible base64 string has been found
                    self.check_for_b64(param, sub_action_section)

                    # Add urls/ips found in parameter to respective lists
                    self.find_ip(param)

        # Check tmp_iocs
        res_temp_iocs = ResultSection('Runtime temporary IOCs')
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs)
            self.check_for_b64(ioc, res_temp_iocs)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection('Discovered PowerShell code in file',
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            vba_builtin_dict = {}
            dict_path = os.path.join(os.path.dirname(__file__),
                                     'VBA_built_ins.txt')
            with open(dict_path, 'r') as f:
                for line in f:
                    line = line.strip()
                    if re.search(r'^#', line):
                        continue
                    if line:
                        line = line.split(';')
                        vba_builtin_dict[line[0].strip()] = line[1].strip()

            external_func_section = ResultSection(
                'VBA functions called',
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtin_dict:
                    external_func_section.add_line(func + ': ' +
                                                   vba_builtin_dict[func])
                else:
                    external_func_section.add_line(func)
    def find_network_indicators(apktool_out_dir: str, result: Result):
        # Whitelist
        skip_list = [
            "android.intent",
            "com.google",
            "com.android",
        ]

        indicator_whitelist = [
            'google.to',
            'google.ttl',
            'google.delay',
            'google_tagmanager.db',
            'gtm_urls.db',
            'gtm.url',
            'google_tagmanager.db',
            'google_analytics_v4.db',
            'Theme.Dialog.Alert',
            'popupLocationInfo.gravity',
            'popupLocationInfo.displayId',
            'popupLocationInfo.left',
            'popupLocationInfo.top',
            'popupLocationInfo.right',
            'popupLocationInfo.bottom',
            'googleads.g.doubleclick.net',
            'ad.doubleclick.net',
            '.doubleclick.net',
            '.googleadservices.com',
            '.googlesyndication.com',
            'android.hardware.type.watch',
            'mraid.js',
            'google_inapp_purchase.db',
            'mobileads.google.com',
            'mobileads.google.com',
            'share_history.xml',
            'share_history.xml',
            'activity_choser_model_history.xml',
            'FragmentPager.SavedState{',
            'android.remoteinput.results',
            'android.people',
            'android.picture',
            'android.icon',
            'android.text',
            'android.title',
            'android.title.big',
            'FragmentTabHost.SavedState{',
            'android.remoteinput.results',
            'android.remoteinput.results',
            'android.remoteinput.results',
            'libcore.icu.ICU',
        ]

        file_list = []

        # Indicators
        url_list = []
        domain_list = []
        ip_list = []
        email_list = []

        # Build dynamic whitelist
        smali_dir = os.path.join(apktool_out_dir, "smali")
        for root, dirs, files in os.walk(smali_dir):
            if not files:
                continue
            else:
                skip_list.append(root.replace(smali_dir + "/", "").replace("/", "."))

            for cdir in dirs:
                skip_list.append(os.path.join(root, cdir).replace(smali_dir + "/", "").replace("/", "."))

        asset_dir = os.path.join(apktool_out_dir, "assets")
        if os.path.exists(asset_dir):
            for root, dirs, files in os.walk(asset_dir):
                if not files:
                    continue
                else:
                    for asset_file in files:
                        file_list.append(asset_file)
        skip_list = list(set(skip_list))

        # Find indicators
        proc = Popen(['grep', '-ER', r'(([[:alpha:]](-?[[:alnum:]])*)\.)*[[:alpha:]](-?[[:alnum:]])+\.[[:alpha:]]{2,}',
                      smali_dir], stdout=PIPE, stderr=PIPE)
        grep, _ = proc.communicate()
        for line in safe_str(grep).splitlines():
            file_path, line = line.split(":", 1)

            if "const-string" in line or "Ljava/lang/String;" in line:
                data = line.split("\"", 1)[1].split("\"")[0]
                data_low = data.lower()
                data_split = data.split(".")
                if data in file_list:
                    continue
                elif data in indicator_whitelist:
                    continue
                elif data.startswith("/"):
                    continue
                elif data_low.startswith("http://") or data_low.startswith('ftp://') or data_low.startswith('https://'):
                    url_list.append(data)
                elif len(data_split[0]) < len(data_split[-1]) and len(data_split[-1]) > 3:
                    continue
                elif data.startswith('android.') and data_low != data:
                    continue
                elif "/" in data and "." in data and data.index("/") < data.index("."):
                    continue
                elif " " in data:
                    continue
                elif data_split[0] in ['com', 'org', 'net', 'java']:
                    continue
                elif data_split[-1].lower() in ['so', 'properties', 'zip', 'read', 'id', 'store',
                                                'name', 'author', 'sh', 'soccer', 'fitness', 'news', 'video']:
                    continue
                elif data.endswith("."):
                    continue
                else:
                    do_skip = False
                    for skip in skip_list:
                        if data.startswith(skip):
                            do_skip = True
                            break

                    if do_skip:
                        continue

                    data = data.strip(".")

                    if is_valid_domain(data):
                        domain_list.append(data)
                    elif is_valid_ip(data):
                        ip_list.append(data)
                    elif is_valid_email(data):
                        email_list.append(data)

        url_list = list(set(url_list))
        for url in url_list:
            dom_ip = url.split("//")[1].split("/")[0]
            if ":" in dom_ip:
                dom_ip = dom_ip.split(":")[0]

            if is_valid_ip(dom_ip):
                ip_list.append(dom_ip)
            elif is_valid_domain(dom_ip):
                domain_list.append(dom_ip)

        ip_list = list(set(ip_list))
        domain_list = list(set(domain_list))
        email_list = list(set(email_list))

        if url_list or ip_list or domain_list or email_list:
            res_net = ResultSection("Network indicator(s) found", parent=result, heuristic=Heuristic(3))

            if url_list:
                res_url = ResultSection("Found urls in the decompiled code", parent=res_net)
                count = 0
                for url in url_list:
                    count += 1
                    if count <= 20:
                        res_url.add_line(url)
                    res_url.add_tag('network.static.uri', url)
                if count > 20:
                    res_url.add_line(f"and {count - 20} more...")

            if ip_list:
                res_ip = ResultSection("Found IPs in the decompiled code", parent=res_net)
                count = 0
                for ip in ip_list:
                    count += 1
                    if count <= 20:
                        res_ip.add_line(ip)
                    res_ip.add_tag('network.static.ip', ip)
                if count > 20:
                    res_ip.add_line(f"and {count - 20} more...")

            if domain_list:
                res_domain = ResultSection("Found domains in the decompiled code", parent=res_net)
                count = 0
                for domain in domain_list:
                    count += 1
                    if count <= 20:
                        res_domain.add_line(domain)
                    res_domain.add_tag('network.static.domain', domain)
                if count > 20:
                    res_domain.add_line(f"and {count - 20} more...")

            if email_list:
                res_email = ResultSection("Found email addresses in the decompiled code", parent=res_net)
                count = 0
                for email in email_list:
                    count += 1
                    if count <= 20:
                        res_email.add_line(email)
                    res_email.add_tag('network.email.address', email)
                if count > 20:
                    res_email.add_line(f"and {count - 20} more...")
    def run_badging_analysis(self, apk_file: str, result: Result):
        badging_args = ['d', 'badging', apk_file]
        badging, errors = self.run_appt(badging_args)
        if not badging:
            return
        res_badging = ResultSection("Android application details")
        libs = []
        permissions = []
        components = []
        features = []
        pkg_version = None
        for line in badging.splitlines():
            if line.startswith("package:"):
                pkg_name = line.split("name='")[1].split("'")[0]
                pkg_version = line.split("versionCode='")[1].split("'")[0]
                res_badging.add_line(f"Package: {pkg_name} v.{pkg_version}")
                res_badging.add_tag('file.apk.pkg_name', pkg_name)
                res_badging.add_tag('file.apk.app.version', pkg_version)

            if line.startswith("sdkVersion:"):
                min_sdk = line.split(":'")[1][:-1]
                res_badging.add_line(f"Min SDK: {min_sdk}")
                res_badging.add_tag('file.apk.sdk.min', min_sdk)

            if line.startswith("targetSdkVersion:"):
                target_sdk = line.split(":'")[1][:-1]
                res_badging.add_line(f"Target SDK: {target_sdk}")
                res_badging.add_tag('file.apk.sdk.target', target_sdk)

            if line.startswith("application-label:"):
                label = line.split(":'")[1][:-1]
                res_badging.add_line(f"Default Label: {label}")
                res_badging.add_tag('file.apk.app.label', label)

            if line.startswith("launchable-activity:"):
                launch = line.split("name='")[1].split("'")[0]
                res_badging.add_line(f"Launchable activity: {launch}")
                res_badging.add_tag('file.apk.activity', launch)

            if line.startswith("uses-library-not-required:"):
                lib = line.split(":'")[1][:-1]
                if lib not in libs:
                    libs.append(lib)

            if line.startswith("uses-permission:") or line.startswith("uses-implied-permission:"):
                perm = line.split("name='")[1].split("'")[0]
                if perm not in permissions:
                    permissions.append(perm)

            if line.startswith("provides-component:"):
                component = line.split(":'")[1][:-1]
                if component not in components:
                    components.append(component)

            if "uses-feature:" in line or "uses-implied-feature:" in line:
                feature = line.split("name='")[1].split("'")[0]
                if feature not in features:
                    features.append(feature)

        if pkg_version is not None:
            pkg_version = int(pkg_version)
            if pkg_version < 15:
                ResultSection("Package version is suspiciously low", parent=res_badging,
                              heuristic=Heuristic(17))
            elif pkg_version > 999999999:
                ResultSection("Package version is suspiciously high", parent=res_badging,
                              heuristic=Heuristic(17))

        if libs:
            res_lib = ResultSection("Libraries used", parent=res_badging)
            for lib in libs:
                res_lib.add_line(lib)
                res_lib.add_tag('file.apk.used_library', lib)

        if permissions:
            res_permissions = ResultSection("Permissions used", parent=res_badging)
            dangerous_permissions = []
            unknown_permissions = []
            for perm in permissions:
                if perm in ALL_ANDROID_PERMISSIONS:
                    if 'dangerous' in ALL_ANDROID_PERMISSIONS[perm]:
                        dangerous_permissions.append(perm)
                    else:
                        res_permissions.add_line(perm)
                        res_permissions.add_tag('file.apk.permission', perm)
                else:
                    unknown_permissions.append(perm)

            if len(set(permissions)) < len(permissions):
                ResultSection("Some permissions are defined more then once", parent=res_badging,
                              heuristic=Heuristic(18))

            if dangerous_permissions:
                res_dangerous_perm = ResultSection("Dangerous permissions used", parent=res_badging,
                                                   heuristic=Heuristic(4))
                for perm in dangerous_permissions:
                    res_dangerous_perm.add_line(perm)
                    res_dangerous_perm.add_tag('file.apk.permission', perm)

            if unknown_permissions:
                res_unknown_perm = ResultSection("Unknown permissions used", parent=res_badging,
                                                 heuristic=Heuristic(5))
                for perm in unknown_permissions:
                    res_unknown_perm.add_line(perm)
                    res_unknown_perm.add_tag('file.apk.permission', perm)

        if features:
            res_features = ResultSection("Features used", parent=res_badging)
            for feature in features:
                res_features.add_line(feature)
                res_features.add_tag('file.apk.feature', feature)

        if components:
            res_components = ResultSection("Components provided", parent=res_badging)
            for component in components:
                res_components.add_line(component)
                res_components.add_tag('file.apk.provides_component', component)

        result.add_section(res_badging)
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in [
                'd729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06'
        ]:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultTextSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines(
                [get_random_phrase() for _ in range(random.randint(1, 5))])
            # You can tag data to a section, tagging is used to to quickly find defining information about a file
            text_section.add_tag("attribution.implant", "ResultSample")
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id(
                random.choice(list(software_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(attack_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(group_map.keys())))
            text_section.heuristic.add_attack_id(
                random.choice(list(revoke_map.keys())))
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=2)
            text_section.heuristic.add_signature_id("sig_two",
                                                    score=20,
                                                    frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # Even if the section was added to the results you can still modify it by adding a subsection for example
            ResultSection(
                "Example of sub-section without a body added later in processing",
                parent=text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            cmap_values = [random.random() * cmap_max for _ in range(50)]
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultGraphSection(
                "Example of colormap result section",
                classification=cl_engine.RESTRICTED)
            section_color_map.set_colormap(cmap_min, cmap_max, cmap_values)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultURLSection('Example of a simple url section')
            url_section.add_url(f"https://{random_host}/", name="Random url!")

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            hosts = [get_random_host() for _ in range(2)]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(hosts))

            url_sub_section = ResultURLSection(
                'Example of a url sub-section with multiple links',
                heuristic=url_heuristic,
                classification=cl_engine.RESTRICTED)
            for host in hosts:
                url_sub_section.add_url(f"https://{host}/")
                url_sub_section.add_tag("network.static.domain", host)

            # You can keep nesting sections if you really need to
            ips = [get_random_ip() for _ in range(3)]
            url_sub_sub_section = ResultURLSection(
                'Exemple of a two level deep sub-section')
            for ip in ips:
                url_sub_sub_section.add_url(f"https://{ip}/")
                url_sub_sub_section.add_tag("network.static.ip", ip)

            # Since url_sub_sub_section is a sub-section of url_sub_section
            # we will add it as a sub-section of url_sub_section not to the main result itself
            url_sub_section.add_subsection(url_sub_sub_section)

            # Invalid sections will be ignored, and an error will apear in the logs
            # Sub-sections of invalid sections will be ignored too
            invalid_section = ResultSection("")
            ResultSection(
                "I won't make it to the report because my parent is invalid :(",
                parent=invalid_section)
            url_sub_section.add_subsection(invalid_section)

            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)

            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(
                b"This is some random text that we will format as an hexdump and you'll see "
                b"that the hexdump formatting will be preserved by the memory dump section!"
            )
            memdump_section = ResultMemoryDumpSection(
                'Example of a memory dump section', body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a dictionary (only str, int, and booleans are allowed)
            kv_section = ResultKeyValueSection(
                'Example of a KEY_VALUE section')
            # You can add items individually
            kv_section.set_item('key', "value")
            # Or simply add them in bulk
            kv_section.update_items({
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            })
            result.add_section(kv_section)

            # ==================================================================
            # ORDERED_KEY_VALUE section:
            #     This section provides the same functionality as the KEY_VALUE section except the order of the fields
            #     are garanteed to be preserved in the order in which the fields are added to the section. Also with
            #     this section, you can repeat the same key name multiple times
            oredered_kv_section = ResultOrderedKeyValueSection(
                'Example of an ORDERED_KEY_VALUE section')
            # You can add items individually
            for x in range(random.randint(3, 6)):
                oredered_kv_section.add_item(f'key{x}', f"value{x}")

            result.add_section(oredered_kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [{
                        "d1_key": "val",
                        "d1_key2": "val2"
                    }, {
                        "d2_key": "val",
                        "d2_key2": "val2"
                    }],
                    "bool":
                    True
                }
            }
            json_section = ResultJSONSection('Example of a JSON section')
            # You can set the json result to a specific value
            json_section.set_json(json_body)
            # You can also update specific parts after the fact
            json_section.update_json({
                'an_int': 1000,
                'updated_key': 'updated_value'
            })

            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "signatures": {}  This dict has the signature name as a key and the score as it's value
            #       "children": []    NB: This list either is empty or contains more dictionaries that have the same
            #                             structure
            #     }
            process_tree_section = ResultProcessTreeSection(
                'Example of a PROCESS_TREE section')
            # You can use the ProcessItem class to create the processes to add to the result section
            evil_process = ProcessItem(123, "evil.exe", "c:\\evil.exe")
            evil_process_child_1 = ProcessItem(
                321, "takeovercomputer.exe",
                "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff")
            # You can add child processes to the ProcessItem objects
            evil_process_child_1.add_child_process(
                ProcessItem(
                    456,
                    "evenworsethanbefore.exe",
                    "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                    signatures={
                        "one": 10,
                        "two": 10,
                        "three": 10
                    }))
            evil_process_child_1.add_child_process(
                ProcessItem(234,
                            "badfile.exe",
                            "C:\\badfile.exe -k nothing_to_see_here",
                            signatures={
                                "one": 1000,
                                "two": 10,
                                "three": 10,
                                "four": 10,
                                "five": 10
                            }))

            # You can add signatures that hit on a ProcessItem Object
            evil_process_child_1.add_signature('one', 250)

            # Or even directly create the ProcessItem object with the signature in it
            evil_process_child_2 = ProcessItem(
                345,
                "benignexe.exe",
                "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                signatures={"one": 2000})

            # You can also add counts for network, file and registry events to a ProcessItem object
            evil_process_child_2.add_network_events(4)
            evil_process_child_2.add_file_events(7000)
            evil_process_child_2.add_registry_events(10)

            # You can also indicate if the process tree item has been safelisted
            benign_process = ProcessItem(678, "trustme.exe", "C:\\trustme.exe")
            benign_process.safelist()

            evil_process.add_child_process(evil_process_child_1)
            evil_process.add_child_process(evil_process_child_2)

            # Add your processes to the result section via the add_process function
            process_tree_section.add_process(evil_process)
            process_tree_section.add_process(
                ProcessItem(987, "runzeroday.exe",
                            "C:\\runzeroday.exe -f insert_bad_spelling"))
            process_tree_section.add_process(benign_process)

            result.add_section(process_tree_section)

            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_section = ResultTableSection('Example of a TABLE section')
            # Use the TableRow class to help adding row to the Table section
            table_section.add_row(
                TableRow(a_str="Some string1",
                         extra_column_here="confirmed",
                         a_bool=False,
                         an_int=101))
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string2",
                        "a_bool": True,
                        "an_int": "to_be_overriden_by_kwargs"
                    },
                    an_int=102))
            table_section.add_row(
                TableRow(a_str="Some string3", a_bool=False, an_int=103))
            # Valid values for the items in the TableRow are: str, int, bool, None, or dict of those values
            table_section.add_row(
                TableRow(
                    {
                        "a_str": "Some string4",
                        "a_bool": None,
                        "an_int": -1000000000000000000
                    }, {
                        "extra_column_there": "confirmed",
                        "nested_key_value_pair": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "nested_kv_thats_too_deep": {
                                "a_str": "Some string3",
                                "a_bool": False,
                                "an_int": 103,
                            },
                        }
                    }))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt",
                                  "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__" + data.encode())
            request.add_extracted(temp_path,
                                  "classified.doc",
                                  "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt",
                                  "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(url_sub_section.body)
            request.add_supplementary(temp_path, "urls.json",
                                      "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json",
                                      "This is the json_body as a JSON file")

            # ==================================================================
            # Zeroize on safe tags
            #     When this feature is turned on, the section will get its score set to zero if all its tags
            #     were safelisted by the safelisting engine
            zero_section = ResultSection('Example of zeroize-able section',
                                         zeroize_on_tag_safe=True)
            zero_section.set_heuristic(2)
            zero_section.add_line(
                "This section will have a zero score if all tags are safelisted."
            )
            zero_section.add_tag('network.static.ip', '127.0.0.1')
            result.add_section(zero_section)

            # ==================================================================
            # Auto-collapse
            #     When this feature is turned on, the section will be collapsed when first displayed
            collapse_section = ResultSection(
                'Example of auto-collapse section', auto_collapse=True)
            collapse_section.set_heuristic(2)
            collapse_section.add_line(
                "This section was collapsed when first loaded in the UI")
            result.add_section(collapse_section)

            # ==================================================================
            # Image Section
            #     This type of section allows the service writer to display images to the user
            image_section = ResultImageSection(request,
                                               'Example of Image section')
            for x in range(6):
                image_section.add_image(f'data/000{x+1}.jpg',
                                        f'000{x+1}.jpg',
                                        f'ResultSample screenshot 000{x+1}',
                                        ocr_heuristic_id=6)
            result.add_section(image_section)

            # ==================================================================
            # Multi Section
            #     This type of section allows the service writer to display multiple section types
            #     in the same result section. Here's a concrete exemple of this:
            multi_section = ResultMultiSection(
                'Example of Multi-typed section')
            multi_section.add_section_part(
                TextSectionBody(
                    body="We have detected very high entropy multiple sections "
                    "of your file, this section is most-likely packed or "
                    "encrypted.\n\nHere are affected sections:"))
            section_count = random.randint(1, 4)
            for x in range(section_count):
                multi_section.add_section_part(
                    KVSectionBody(section_name=f".UPX{x}",
                                  offset=f'0x00{8+x}000',
                                  size='4196 bytes'))
                graph_part = GraphSectionBody()
                graph_part.set_colormap(
                    0, 8, [7 + random.random() for _ in range(20)])
                multi_section.add_section_part(graph_part)
                if x != section_count - 1:
                    multi_section.add_section_part(DividerSectionBody())
                multi_section.add_tag("file.pe.sections.name", f".UPX{x}")

            multi_section.set_heuristic(5)
            result.add_section(multi_section)

            # ==================================================================
            # Propagate temporary submission data to other services
            #   Sometimes two service can work in tandem were one extra some piece of information the other
            #   one uses to do it's work. This is how a service can set temporary data that other
            #   services that subscribe to can use.
            request.temp_submission_data['kv_section'] = kv_section.body
            request.temp_submission_data[
                'process_tree_section'] = process_tree_section.body
            request.temp_submission_data['url_section'] = url_sub_section.body

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
Exemple #21
0
    def hex_results(self, request, patterns):
        """
        Finds and reports long ascii hex strings

        Args:
            request: AL request object with result section
            patterns: PatternMatch object
        """
        asciihex_file_found = False
        asciihex_dict = {}
        asciihex_bb_dict = {}

        hex_pat = re.compile(b'((?:[0-9a-fA-F]{2}[\r]?[\n]?){16,})')
        for hex_match in re.findall(hex_pat, request.file_contents):
            hex_string = hex_match.replace(b'\r', b'').replace(b'\n', b'')
            afile_found, asciihex_results = self.unhexlify_ascii(
                request, hex_string, request.file_type, patterns)
            if afile_found:
                asciihex_file_found = True
            if asciihex_results != b"":
                for ask, asi in asciihex_results.items():
                    if ask.startswith('BB_'):
                        # Add any xor'd content to its own result set
                        ask = ask.split('_', 1)[1]
                        if ask not in asciihex_bb_dict:
                            asciihex_bb_dict[ask] = []
                        asciihex_bb_dict[ask].append(asi)
                    else:
                        if ask not in asciihex_dict:
                            asciihex_dict[ask] = []
                        asciihex_dict[ask].append(asi)

        # Report Ascii Hex Encoded Data:
        if asciihex_file_found:
            asciihex_emb_res = (ResultSection(
                "Found Large Ascii Hex Strings in Non-Executable:",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                heuristic=Heuristic(7),
                parent=request.result))
            asciihex_emb_res.add_line(
                "Extracted possible ascii-hex object(s). See extracted files.")

        if len(asciihex_dict) > 0:
            # Different scores are used depending on whether the file is a document
            heuristic = Heuristic(8)
            if request.file_type.startswith("document"):
                heuristic = Heuristic(10)
            asciihex_res = (ResultSection("ASCII HEX DECODED IOC Strings:",
                                          body_format=BODY_FORMAT.MEMORY_DUMP,
                                          heuristic=heuristic,
                                          parent=request.result))
            for k, l in sorted(asciihex_dict.items()):
                for i in l:
                    for ii in i:
                        asciihex_res.add_line(
                            f"Found {k.replace('_', ' ')} decoded HEX string: {ii}"
                        )

        if len(asciihex_bb_dict) > 0:
            asciihex_bb_res = (ResultSection(
                "ASCII HEX AND XOR DECODED IOC Strings:",
                heuristic=Heuristic(9),
                parent=request.result))
            xindex = 0
            for k, l in sorted(asciihex_bb_dict.items()):
                for i in l:
                    for kk, ii in i.items():
                        xindex += 1
                        asx_res = (ResultSection(f"Result {xindex}",
                                                 parent=asciihex_bb_res))
                        asx_res.add_line(
                            f"Found {k.replace('_', ' ')} decoded HEX string, masked with "
                            f"transform {ii[1]}:")
                        asx_res.add_line("Decoded XOR string:")
                        asx_res.add_line(ii[0])
                        asx_res.add_line("Original ASCII HEX String:")
                        asx_res.add_line(kk)
                        asciihex_bb_res.add_tag(k, ii[0])
    def hex_results(self, request: ServiceRequest,
                    patterns: PatternMatch) -> None:
        """
        Finds and reports long ascii hex strings

        Args:
            request: AL request object with result section
            patterns: PatternMatch object
        """
        asciihex_file_found = False
        asciihex_dict: Dict[str, Set[str]] = {}
        asciihex_bb_dict: Dict[str, Set[Tuple[bytes, bytes, str]]] = {}

        hex_pat = re.compile(b'((?:[0-9a-fA-F]{2}[\r]?[\n]?){16,})')
        for hex_match in re.findall(hex_pat, request.file_contents):
            hex_string = hex_match.replace(b'\r', b'').replace(b'\n', b'')
            afile_found, asciihex_results, xorhex_results = self.unhexlify_ascii(
                request, hex_string, request.file_type, patterns)
            if afile_found:
                asciihex_file_found = True
            for ascii_key, ascii_values in asciihex_results.items():
                asciihex_dict.setdefault(ascii_key, set())
                asciihex_dict[ascii_key].update(ascii_values)
            for xor_key, xor_results in xorhex_results.items():
                if xor_key.startswith('BB_'):
                    xor_key = xor_key.split('_', 1)[1]
                    asciihex_bb_dict.setdefault(xor_key, set())
                    asciihex_bb_dict[xor_key].add(xor_results)
                else:
                    asciihex_dict.setdefault(xor_key, set())
                    asciihex_dict[xor_key].add(safe_str(xor_results[1]))

        # Report Ascii Hex Encoded Data:
        if asciihex_file_found:
            asciihex_emb_res = (ResultSection(
                "Found Large Ascii Hex Strings in Non-Executable:",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                heuristic=Heuristic(7),
                parent=request.result))
            asciihex_emb_res.add_line(
                "Extracted possible ascii-hex object(s). See extracted files.")

        if asciihex_dict:
            # Different scores are used depending on whether the file is a document
            asciihex_res = (ResultSection(
                "ASCII HEX DECODED IOC Strings:",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                heuristic=Heuristic(
                    10 if request.file_type.startswith("document") else 8),
                parent=request.result))
            for key, hex_list in sorted(asciihex_dict.items()):
                for h in hex_list:
                    asciihex_res.add_line(
                        f"Found {key.replace('_', ' ')} decoded HEX string: {safe_str(h)}"
                    )
                    asciihex_res.add_tag(key, h)

        if asciihex_bb_dict:
            asciihex_bb_res = (ResultSection(
                "ASCII HEX AND XOR DECODED IOC Strings:",
                heuristic=Heuristic(9),
                parent=request.result))
            for xindex, (xkey,
                         xset) in enumerate(sorted(asciihex_bb_dict.items())):
                for xresult in xset:
                    data, match, transform = xresult
                    asx_res = (ResultSection(f"Result {xindex}",
                                             parent=asciihex_bb_res))
                    asx_res.add_line(
                        f"Found {xkey.replace('_', ' ')} decoded HEX string, masked with "
                        f"transform {safe_str(transform)}:")
                    asx_res.add_line("Decoded XOR string:")
                    asx_res.add_line(safe_str(match))
                    asx_res.add_line("Original ASCII HEX String:")
                    asx_res.add_line(safe_str(data))
                    asciihex_bb_res.add_tag(xkey, match)
    def execute(self, request):
        # ==================================================================
        # Execute a request:
        #   Every time your service receives a new file to scan, the execute function is called
        #   This is where you should execute your processing code.
        #   For the purpose of this example, we will only generate results ...

        # You should run your code here...

        # ==================================================================
        # Check if we're scanning an embedded file
        #   This service always drop 3 embedded file which two generates random results and the other empty results
        #   We're making a check to see if we're scanning the embedded file.
        #   In a normal service this is not something you would do at all but since we are using this
        #   service in our unit test to test all features of our report generator, we have to do this
        if request.sha256 not in ['d729ecfb2cf40bc4af8038dac609a57f57dbe6515d35357af973677d5e66417a',
                                  '5ce5ae8ef56a54af2c44415800a81ecffd49a33ae8895dfe38fc1075d3f619ec',
                                  'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06']:
            # Main file results...

            # ==================================================================
            # Write the results:
            #   First, create a result object where all the result sections will be saved to
            result = Result()

            # ==================================================================
            # Standard text section: BODY_FORMAT.TEXT - DEFAULT
            #   Text sections basically just dumps the text to the screen...
            #     All sections scores will be SUMed in the service result
            #     The Result classification will be the highest classification found in the sections
            text_section = ResultSection('Example of a default section')
            # You can add lines to your section one at a time
            #   Here we will generate a random line
            text_section.add_line(get_random_phrase())
            # Or your can add them from a list
            #   Here we will generate random amount of random lines
            text_section.add_lines([get_random_phrase() for _ in range(random.randint(1, 5))])
            # If the section needs to affect the score of the file you need to set a heuristics
            #   Here we will pick one at random
            #     In addition to add a heuristic, we will associated a signature with the heuristic,
            #     we're doing this by adding the signature name to the heuristic. (Here we generating a random name)
            text_section.set_heuristic(3, signature="sig_one")
            # You can attach attack ids to heuristics after they where defined
            text_section.heuristic.add_attack_id("T1066")
            # Same thing for the signatures, they can be added to heuristic after the fact and you can even say how
            #   many time the signature fired by setting its frequency. If you call add_signature_id twice with the
            #   same signature, this will effectively increase the frequency of the signature.
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=2)
            text_section.heuristic.add_signature_id("sig_two", score=20, frequency=3)
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_three")
            text_section.heuristic.add_signature_id("sig_four", score=0)
            # The heuristic for text_section should have the following properties
            #   1. 1 attack ID: T1066
            #   2. 4 signatures: sig_one, sig_two, sig_three and sig_four
            #   3. Signature frequencies are cumulative therefor they will be as follow:
            #      - sig_one = 1
            #      - sig_two = 5
            #      - sig_three = 2
            #      - sig_four = 1
            #   4. The score used by each heuristic is driven by the following rules: signature_score_map is higher
            #      priority, then score value for the add_signature_id is in second place and finally the default
            #      heuristic score is use. Therefor the score used to calculate the total score for the text_section is
            #      as follow:
            #      - sig_one: 10    -> heuristic default score
            #      - sig_two: 20    -> score provided by the function add_signature_id
            #      - sig_three: 30  -> score provided by the heuristic map
            #      - sig_four: 40   -> score provided by the heuristic map because it's higher priority than the
            #                          function score
            #    5. Total section score is then: 1x10 + 5x20 + 2x30 + 1x40 = 210
            # Make sure you add your section to the result
            result.add_section(text_section)

            # ==================================================================
            # Color map Section: BODY_FORMAT.GRAPH_DATA
            #     Creates a color map bar using a minimum and maximum domain
            #     e.g. We are using this section to display the entropy distribution in some services
            cmap_min = 0
            cmap_max = 20
            color_map_data = {
                'type': 'colormap',
                'data': {
                    'domain': [cmap_min, cmap_max],
                    'values': [random.random() * cmap_max for _ in range(50)]
                }
            }
            # The classification of a section can be set to any valid classification for your system
            section_color_map = ResultSection("Example of colormap result section", body_format=BODY_FORMAT.GRAPH_DATA,
                                              body=json.dumps(color_map_data), classification=cl_engine.RESTRICTED)
            result.add_section(section_color_map)

            # ==================================================================
            # URL section: BODY_FORMAT.URL
            #   Generate a list of clickable urls using a json encoded format
            #     As you can see here, the body of the section can be set directly instead of line by line
            random_host = get_random_host()
            url_section = ResultSection('Example of a simple url section', body_format=BODY_FORMAT.URL,
                                        body=json.dumps({"name": "Random url!", "url": f"https://{random_host}/"}))

            # Since urls are very important features we can tag those features in the system so they are easy to find
            #   Tags are defined by a type and a value
            url_section.add_tag("network.static.domain", random_host)

            # You may also want to provide a list of url!
            #   Also, No need to provide a name, the url link will be displayed
            host1 = get_random_host()
            host2 = get_random_host()
            ip1 = get_random_ip()
            ip2 = get_random_ip()
            ip3 = get_random_ip()
            urls = [
                {"url": f"https://{host1}/"},
                {"url": f"https://{host2}/"},
                {"url": f"https://{ip1}/"},
                {"url": f"https://{ip2}/"},
                {"url": f"https://{ip3}/"}]

            # A heuristic can fire more then once without being associated to a signature
            url_heuristic = Heuristic(4, frequency=len(urls))

            url_sub_section = ResultSection('Example of a url section with multiple links',
                                            body=json.dumps(urls), body_format=BODY_FORMAT.URL,
                                            heuristic=url_heuristic)
            url_sub_section.add_tag("network.static.ip", ip1)
            url_sub_section.add_tag("network.static.ip", ip2)
            url_sub_section.add_tag("network.static.ip", ip3)
            url_sub_section.add_tag("network.static.domain", host1)
            url_sub_section.add_tag("network.dynamic.domain", host2)
            # Since url_sub_section is a sub-section of url_section
            # we will add it as a sub-section of url_section not to the main result itself
            url_section.add_subsection(url_sub_section)
            result.add_section(url_section)

            # ==================================================================
            # Memory dump section: BODY_FORMAT.MEMORY_DUMP
            #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
            data = hexdump(b"This is some random text that we will format as an hexdump and you'll see "
                           b"that the hexdump formatting will be preserved by the memory dump section!")
            memdump_section = ResultSection('Example of a memory dump section', body_format=BODY_FORMAT.MEMORY_DUMP,
                                            body=data)
            memdump_section.set_heuristic(random.randint(1, 4))
            result.add_section(memdump_section)

            # ==================================================================
            # KEY_VALUE section:
            #     This section allows the service writer to list a bunch of key/value pairs to be displayed in the UI
            #     while also providing easy to parse data for auto mated tools.
            #     NB: You should definitely use this over a JSON body type since this one will be displayed correctly
            #         in the UI for the user
            #     The body argument must be a json dumps of a dictionary (only str, int, and booleans are allowed)
            kv_body = {
                "a_str": "Some string",
                "a_bool": False,
                "an_int": 102,
            }
            kv_section = ResultSection('Example of a KEY_VALUE section', body_format=BODY_FORMAT.KEY_VALUE,
                                       body=json.dumps(kv_body))
            result.add_section(kv_section)

            # ==================================================================
            # JSON section:
            #     Re-use the JSON editor we use for administration (https://github.com/josdejong/jsoneditor)
            #     to display a tree view of JSON results.
            #     NB: Use this sparingly! As a service developer you should do your best to include important
            #     results as their own result sections.
            #     The body argument must be a json dump of a python dictionary
            json_body = {
                "a_str": "Some string",
                "a_list": ["a", "b", "c"],
                "a_bool": False,
                "an_int": 102,
                "a_dict": {
                    "list_of_dict": [
                        {"d1_key": "val", "d1_key2": "val2"},
                        {"d2_key": "val", "d2_key2": "val2"}
                    ],
                    "bool": True
                }
            }
            json_section = ResultSection('Example of a JSON section', body_format=BODY_FORMAT.JSON,
                                         body=json.dumps(json_body))
            result.add_section(json_section)

            # ==================================================================
            # PROCESS_TREE section:
            #     This section allows the service writer to list a bunch of dictionary objects that have nested lists
            #     of dictionaries to be displayed in the UI. Each dictionary object represents a process, and therefore
            #     each dictionary must have be of the following format:
            #     {
            #       "process_pid": int,
            #       "process_name": str,
            #       "command_line": str,
            #       "children": [] NB: This list either is empty or contains more dictionaries that have the same
            #                          structure
            #     }
            nc_body = [
                {
                    "process_pid": 123,
                    "process_name": "evil.exe",
                    "command_line": "C:\\evil.exe",
                    "signatures": {},
                    "children": [
                        {
                            "process_pid": 321,
                            "process_name": "takeovercomputer.exe",
                            "command_line": "C:\\Temp\\takeovercomputer.exe -f do_bad_stuff",
                            "signatures": {"one":250},
                            "children": [
                                {
                                    "process_pid": 456,
                                    "process_name": "evenworsethanbefore.exe",
                                    "command_line": "C:\\Temp\\evenworsethanbefore.exe -f change_reg_key_cuz_im_bad",
                                    "signatures": {"one":10, "two":10, "three":10},
                                    "children": []
                                },
                                {
                                    "process_pid": 234,
                                    "process_name": "badfile.exe",
                                    "command_line": "C:\\badfile.exe -k nothing_to_see_here",
                                    "signatures": {"one":1000, "two":10, "three":10, "four":10, "five":10},
                                    "children": []
                                }
                            ]
                        },
                        {
                            "process_pid": 345,
                            "process_name": "benignexe.exe",
                            "command_line": "C:\\benignexe.exe -f \"just kidding, i'm evil\"",
                            "signatures": {"one": 2000},
                            "children": []
                        }
                    ]
                },
                {
                    "process_pid": 987,
                    "process_name": "runzeroday.exe",
                    "command_line": "C:\\runzeroday.exe -f insert_bad_spelling",
                    "signatures": {},
                    "children": []
                }
            ]
            nc_section = ResultSection('Example of a PROCESS_TREE section',
                                       body_format=BODY_FORMAT.PROCESS_TREE,
                                       body=json.dumps(nc_body))
            result.add_section(nc_section)
            
            # ==================================================================
            # TABLE section:
            #     This section allows the service writer to have their content displayed in a table format in the UI
            #     The body argument must be a list [] of dict {} objects. A dict object can have a key value pair
            #     where the value is a flat nested dictionary, and this nested dictionary will be displayed as a nested
            #     table within a cell.
            table_body = [
                {
                    "a_str": "Some string1",
                    "extra_column_here": "confirmed",
                    "a_bool": False,
                    "an_int": 101,
                },
                {
                    "a_str": "Some string2",
                    "a_bool": True,
                    "an_int": 102,
                },
                {
                    "a_str": "Some string3",
                    "a_bool": False,
                    "an_int": 103,
                },
                {
                    "a_str": "Some string4",
                    "a_bool": None,
                    "an_int": -1000000000000000000,
                    "extra_column_there": "confirmed",
                    "nested_table": {
                        "a_str": "Some string3",
                        "a_bool": False,
                        "nested_table_thats_too_deep": {
                            "a_str": "Some string3",
                            "a_bool": False,
                            "an_int": 103,
                        },
                    },
                },
            ]
            table_section = ResultSection('Example of a TABLE section',
                                          body_format=BODY_FORMAT.TABLE,
                                          body=json.dumps(table_body))
            result.add_section(table_section)

            # ==================================================================
            # Re-Submitting files to the system
            #     Adding extracted files will have them resubmitted to the system for analysis

            # This file will generate random results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(data.encode())
            request.add_extracted(temp_path, "file.txt", "Extracted by some magic!")

            # Embedded files can also have their own classification!
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"CLASSIFIED!!!__"+data.encode())
            request.add_extracted(temp_path, "classified.doc", "Classified file ... don't look",
                                  classification=cl_engine.RESTRICTED)

            # This file will generate empty results on the next run
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "wb") as myfile:
                myfile.write(b"EMPTY")
            request.add_extracted(temp_path, "empty.txt", "Extracted empty resulting file")

            # ==================================================================
            # Supplementary files
            #     Adding supplementary files will save them on the datastore for future
            #      reference but wont reprocess those files.
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(urls))
            request.add_supplementary(temp_path, "urls.json", "These are urls as a JSON file")
            # like embedded files, you can add more then one supplementary files
            fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
            with os.fdopen(fd, "w") as myfile:
                myfile.write(json.dumps(json_body))
            request.add_supplementary(temp_path, "json_body.json", "This is the json_body as a JSON file")

            # ==================================================================
            # Wrap-up:
            #     Save your result object back into the request
            request.result = result

        # ==================================================================
        # Empty results file
        elif request.sha256 == 'cc1d2f838445db7aec431df9ee8a871f40e7aa5e064fc056633ef8c60fab7b06':
            # Creating and empty result object
            request.result = Result()

        # ==================================================================
        # Randomized results file
        else:
            # For the randomized  results file, we will completely randomize the results
            #   The content of those results do not matter since we've already showed you
            #   all the different result sections, tagging, heuristics and file upload functions
            embedded_result = Result()

            # random number of sections
            for _ in range(1, 3):
                embedded_result.add_section(self._create_random_section())

            request.result = embedded_result
    def unicode_results(self, request: ServiceRequest,
                        patterns: PatternMatch) -> Optional[ResultSection]:
        """
        Finds and report unicode encoded strings

        Args:
            request: AL request object with result section
            patterns: PatternMatch object

        Returns:
            The result section (with request.result as its parent) if one is created
        """
        unicode_al_results: Dict[str, Tuple[bytes, bytes]] = {}
        dropped_unicode: List[Tuple[str, str]] = []
        for hes in self.HEXENC_STRINGS:
            if re.search(
                    re.escape(hes) + b'[A-Fa-f0-9]{2}', request.file_contents):
                dropped = self.decode_encoded_udata(request, hes,
                                                    request.file_contents,
                                                    unicode_al_results)
                for uhash in dropped:
                    dropped_unicode.append((uhash, safe_str(hes)))

        # Report Unicode Encoded Data:
        unicode_heur = Heuristic(
            5, frequency=len(dropped_unicode)) if dropped_unicode else None
        unicode_emb_res = ResultSection(
            "Found Unicode-Like Strings in Non-Executable:",
            body_format=BODY_FORMAT.MEMORY_DUMP,
            heuristic=unicode_heur)
        for uhash, uenc in dropped_unicode:
            unicode_emb_res.add_line(
                f"Extracted over 50 bytes of possible embedded unicode with "
                f"{uenc} encoding. SHA256: {uhash}. See extracted files.")

        for unires_index, (sha256,
                           (decoded,
                            encoded)) in enumerate(unicode_al_results.items()):
            sub_uni_res = (ResultSection(f"Result {unires_index}",
                                         parent=unicode_emb_res))
            sub_uni_res.add_line(f'ENCODED TEXT SIZE: {len(decoded)}')
            sub_uni_res.add_line(
                f'ENCODED SAMPLE TEXT: {safe_str(encoded)}[........]')
            sub_uni_res.add_line(f'DECODED SHA256: {sha256}')
            subb_uni_res = (ResultSection("DECODED ASCII DUMP:",
                                          body_format=BODY_FORMAT.MEMORY_DUMP,
                                          parent=sub_uni_res))
            subb_uni_res.add_line('{}'.format(safe_str(decoded)))
            # Look for IOCs of interest
            hits = self.ioc_to_tag(decoded,
                                   patterns,
                                   sub_uni_res,
                                   st_max_length=1000,
                                   taglist=True)
            if hits:
                sub_uni_res.set_heuristic(6)
                subb_uni_res.add_line(
                    "Suspicious string(s) found in decoded data.")
            else:
                sub_uni_res.set_heuristic(4)

        if unicode_al_results or dropped_unicode:
            request.result.add_section(unicode_emb_res)
            return unicode_emb_res
        return None
Exemple #25
0
    def execute(self, request):
        request.result = Result()
        self.result = request.result
        file_path = request.file_path
        fh = open(file_path, 'rb')
        try:
            self.swf = SWF(fh)
            if self.swf is None:
                raise Exception("self.swf is None")
        except Exception as e:
            self.log.exception(
                f"Unable to parse file {request.sha256}: {str(e)}")
            fh.close()
            raise
        self.tag_summary = defaultdict(list)
        self.symbols = {}
        self.binary_data = {}
        self.exported_assets = []
        self.big_buffers = set()
        self.has_product_info = False
        self.anti_decompilation = False
        self.recent_compile = False
        self.disasm_path = None

        header_subsection = ResultSection(title_text="SWF Header",
                                          parent=self.result)
        if self.swf.header.version:
            header_subsection.add_line("Version: %d" % self.swf.header.version)
            header_subsection.add_tag(tag_type="file.swf.header.version",
                                      value=str(self.swf.header.version))
        header_subsection.add_line("File length: %d" %
                                   self.swf.header.file_length)
        if self.swf.header.frame_size.__str__():
            header_subsection.add_line("Frame size: %s" %
                                       self.swf.header.frame_size.__str__())
            header_subsection.add_tag(
                tag_type="file.swf.header.frame.size",
                value=self.swf.header.frame_size.__str__())
        if self.swf.header.frame_rate:
            header_subsection.add_line("Frame rate: %d" %
                                       self.swf.header.frame_rate)
            header_subsection.add_tag(tag_type="file.swf.header.frame.rate",
                                      value=str(self.swf.header.frame_rate))
        if self.swf.header.frame_count:
            header_subsection.add_line("Frame count: %d" %
                                       self.swf.header.frame_count)
            header_subsection.add_tag(tag_type="file.swf.header.frame.count",
                                      value=str(self.swf.header.frame_count))

        # Parse Tags
        tag_subsection = ResultSection(title_text="SWF Tags",
                                       parent=self.result)
        tag_types = []
        for tag in self.swf.tags:
            self.tag_analyzers.get(SWF_TAGS.get(tag.type), self._dummy)(tag)
            tag_types.append(str(tag.type))
        tag_list = ','.join(tag_types)
        tags_ssdeep = ssdeep.hash(tag_list)
        tag_subsection.add_tag(tag_type="file.swf.tags_ssdeep",
                               value=tags_ssdeep)
        # TODO: not sure we want to split those...
        # _, hash_one, hash_two = tags_ssdeep.split(':')
        # tag_subsection.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP, value=hash_one)
        # tag_subsection.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP, value=hash_two)

        # Script Overview
        if len(self.symbols.keys()) > 0:
            root_symbol = 'unspecified'
            if 0 in self.symbols:
                root_symbol = self.symbols[0]
                self.symbols.pop(0)
            symbol_subsection = ResultSection(title_text="Symbol Summary",
                                              parent=self.result)
            symbol_subsection.add_line(f'Main: {root_symbol}')
            if len(self.symbols.keys()) > 0:
                for tag_id, name in sorted([(k, v)
                                            for k, v in self.symbols.items()]):
                    symbol_subsection.add_line(f'ID:{tag_id} - {name}')

        if len(self.binary_data.keys()) > 0:
            binary_subsection = ResultSection(
                title_text="Attached Binary Data",
                heuristic=Heuristic(3),
                parent=self.result)
            for tag_id, tag_data in self.binary_data.items():
                tag_name = self.symbols.get(tag_id, 'unspecified')
                binary_subsection.add_line(f'ID:{tag_id} - {tag_name}')
                try:
                    binary_filename = hashlib.sha256(
                        tag_data).hexdigest() + '.attached_binary'
                    binary_path = os.path.join(self.working_directory,
                                               binary_filename)
                    with open(binary_path, 'wb') as fh:
                        fh.write(tag_data)
                    request.add_extracted(
                        binary_path, f"{tag_name}_{tag_id}",
                        f"SWF Embedded Binary Data {str(tag_id)}")
                except Exception:
                    self.log.exception(
                        "Error submitting embedded binary data for swf:")

        tags_subsection = ResultSection(title_text="Tags of Interest")
        for tag in sorted(self.tag_summary.keys()):
            body = []
            summaries = self.tag_summary[tag]
            for summary in summaries:
                summary_line = '\t'.join(summary)
                body.append(summary_line)
            if body:
                subtag_section = ResultSection(title_text=tag,
                                               parent=tags_subsection)
                subtag_section.add_lines(body)
        if len(tags_subsection.subsections) > 0:
            self.result.add_section(tags_subsection)

        if len(self.big_buffers) > 0:
            bbs = ResultSection(title_text="Large String Buffers",
                                heuristic=Heuristic(1),
                                parent=self.result)
            for buf in self.big_buffers:
                if isinstance(buf, str):
                    buf = buf.encode()
                bbs.add_line("Found a %d byte string." % len(buf))
                buf_filename = ""
                try:
                    buf_filename = hashlib.sha256(
                        buf).hexdigest() + '.stringbuf'
                    buf_path = os.path.join(self.working_directory,
                                            buf_filename)
                    with open(buf_path, 'wb') as fh:
                        fh.write(buf)
                    request.add_extracted(buf_path,
                                          "AVM2 Large String Buffer.",
                                          buf_filename)
                except Exception:
                    self.log.exception(
                        "Error submitting AVM2 String Buffer %s" %
                        buf_filename)

        if not self.has_product_info:
            self.log.debug("Missing product info.")
            no_info = ResultSection(title_text="Missing Product Information",
                                    heuristic=Heuristic(5),
                                    parent=self.result)
            no_info.add_line(
                "This SWF doesn't specify information about the product that created it."
            )

        if self.anti_decompilation:
            self.log.debug("Anti-disassembly techniques may be present.")
            no_dis = ResultSection(title_text="Incomplete Disassembly",
                                   heuristic=Heuristic(4),
                                   parent=self.result)
            no_dis.add_line(
                "This SWF may contain intentional corruption or obfuscation to prevent disassembly."
            )

        if self.recent_compile:
            recent_compile = ResultSection(title_text="Recent Compilation",
                                           heuristic=Heuristic(2),
                                           parent=self.result)
            recent_compile.add_line(
                "This SWF was compiled within the last 24 hours.")

        fh.close()
    def base64_results(self, request: ServiceRequest,
                       patterns: PatternMatch) -> Optional[ResultSection]:
        """
        Finds and reports Base64 encoded text

        Args:
            request: AL request object with result section
            patterns: PatternMatch object

        Returns:
            The result section (with request.result as its parent) if one is created
        """
        b64_al_results = []
        b64_matches = set()

        # Base64 characters with possible space, newline characters and HTML line feeds (&#(XA|10);)
        for b64_match in re.findall(
                b'([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}'
                b'(?:&#[x1][A0];)?[\r]?[\n]?){2,})', request.file_contents):
            b64_string = b64_match.replace(b'\n', b'').replace(b'\r', b'').replace(b' ', b'')\
                .replace(b'&#xA;', b'').replace(b'&#10;', b'')
            if b64_string in b64_matches:
                continue
            b64_matches.add(b64_string)
            uniq_char = set(b64_string)
            if len(uniq_char) > 6:
                b64result, tags = self.b64(request, b64_string, patterns)
                if len(b64result) > 0:
                    b64_al_results.append((b64result, tags))

        # UTF-16 strings
        for ust in strings.extract_unicode_strings(request.file_contents,
                                                   n=self.st_min_length):
            for b64_match in re.findall(
                    b'([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})',
                    ust.s):
                b64_string = b64_match.replace(b'\n', b'').replace(
                    b'\r', b'').replace(b' ', b'')
                uniq_char = set(b64_string)
                if len(uniq_char) > 6:
                    b64result, tags = self.b64(request, b64_string, patterns)
                    if len(b64result) > 0:
                        b64_al_results.append((b64result, tags))

        # Report B64 Results
        if len(b64_al_results) > 0:
            b64_ascii_content: List[bytes] = []
            b64_res = (ResultSection("Base64 Strings:",
                                     heuristic=Heuristic(1),
                                     parent=request.result))
            b64index = 0
            for b64dict, tags in b64_al_results:
                for ttype, values in tags.items():
                    for v in values:
                        b64_res.add_tag(ttype, v)
                for b64k, b64l in b64dict.items():
                    b64index += 1
                    sub_b64_res = (ResultSection(f"Result {b64index}",
                                                 parent=b64_res))
                    sub_b64_res.add_line(f'BASE64 TEXT SIZE: {b64l[0]}')
                    sub_b64_res.add_line(
                        f'BASE64 SAMPLE TEXT: {safe_str(b64l[1])}[........]')
                    sub_b64_res.add_line(f'DECODED SHA256: {b64k}')
                    subb_b64_res = (ResultSection(
                        "DECODED ASCII DUMP:",
                        body_format=BODY_FORMAT.MEMORY_DUMP,
                        parent=sub_b64_res))
                    subb_b64_res.add_line(safe_str(b64l[2]))

                    if b64l[2] not in [
                            b"[Possible file contents. See extracted files.]",
                            b"[IOCs discovered with other non-printable data. See extracted files.]"
                    ]:
                        b64_ascii_content.append(b64l[3])
            # Write all non-extracted decoded b64 content to file
            if len(b64_ascii_content) > 0:
                all_b64 = b"\n".join(b64_ascii_content)
                b64_all_sha256 = hashlib.sha256(all_b64).hexdigest()
                self.extract_file(request, all_b64,
                                  f"all_b64_{b64_all_sha256[:7]}.txt",
                                  "all misc decoded b64 from sample")
            return b64_res
        return None
    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )
Exemple #28
0
    def validate_certs(self, certs, cur_file, supplementary_files):
        """
        This method tags out of a certificate or certificate chain. The start and
        end date, issuer, and owner are all pulled. The certificate itself is included as a
        supplementary file.

        :param certs: the keytool -printcert string representation of a certificate/certificate chain
        :param cur_file: the file path of the certificate (to be used in supplementary_files)
        :param supplementary_files: the services supplementary files
        :return:
        """
        certs = certificate_chain_from_printcert(certs)

        for cert in certs:
            res_cert = ResultSection("Certificate Analysis", body=safe_str(cert.raw),
                                     body_format=BODY_FORMAT.MEMORY_DUMP)

            res_cert.add_tag('cert.valid.start', cert.valid_from)
            res_cert.add_tag('cert.valid.end', cert.valid_to)
            res_cert.add_tag('cert.issuer', cert.issuer)
            res_cert.add_tag('cert.owner', cert.owner)

            valid_from_splitted = cert.valid_from.split(" ")
            valid_from_year = int(valid_from_splitted[-1])

            valid_to_splitted = cert.valid_to.split(" ")
            valid_to_year = int(valid_to_splitted[-1])

            if cert.owner == cert.issuer:
                ResultSection("Certificate is self-signed", parent=res_cert,
                              heuristic=Heuristic(11))

            if not cert.country:
                ResultSection("Certificate owner has no country", parent=res_cert,
                              heuristic=Heuristic(12))

            if valid_from_year > valid_to_year:
                ResultSection("Certificate expires before validity date starts", parent=res_cert,
                              heuristic=Heuristic(15))

            if (valid_to_year - valid_from_year) > 30:
                ResultSection("Certificate valid more then 30 years", parent=res_cert,
                              heuristic=Heuristic(13))

            if cert.country:
                try:
                    int(cert.country)
                    is_int_country = True
                except Exception:
                    is_int_country = False

                if len(cert.country) != 2 or is_int_country:
                    ResultSection("Invalid country code in certificate owner", parent=res_cert,
                                  heuristic=Heuristic(14))

            self.signature_block_certs.append(res_cert)

            if len(res_cert.subsections) > 0:
                name = os.path.basename(cur_file)
                desc = f'JAR Signature Block: {name}'
                supplementary_files.append((cur_file.decode('utf-8'), name.decode('utf-8'), desc))
    def execute(self, request):
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()
        before = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values:
            if request.get_param('extract_original_iocs'):
                ioc_res = ResultSection(
                    "The following IOCs were found in the original file",
                    parent=request.result,
                    body_format=BODY_FORMAT.MEMORY_DUMP)
            else:
                ioc_res = None
            for k, val in pat_values.items():
                if val == "":
                    asc_asc = unicodedata.normalize('NFKC', val).encode(
                        'ascii', 'ignore')
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(asc_asc)}"
                        )
                        ioc_res.add_tag(k, asc_asc)
                    before.add((k, asc_asc))
                else:
                    for v in val:
                        if ioc_res:
                            ioc_res.add_line(
                                f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_res.add_tag(k, v)
                        before.add((k, v))

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if re.match(re.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            for name, technique in techniques:
                res = technique(layer)
                if res:
                    layers_list.append((name, res))
                    # Looks like it worked, restart with new layer
                    layer = res
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    for name, technique in final_pass:
                        res = technique(layer)
                        if res:
                            layers_list.append((name, res))
                    break
                else:
                    for x in second_pass:
                        techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)
            heur_id = None

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            elif num_layers >= 100:
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags = {}

                for k, val in pat_values.items():
                    if val == "":
                        asc_asc = unicodedata.normalize('NFKC', val).encode(
                            'ascii', 'ignore')
                        if (k, asc_asc) not in before:
                            diff_tags.setdefault(k, [])
                            diff_tags[k].append(asc_asc)
                    else:
                        for v in val:
                            if (k, v) not in before:
                                diff_tags.setdefault(k, [])
                                diff_tags[k].append(v)

                if request.deep_scan or \
                        (len(clean) > 1000 and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    fn = f"{request.file_name}_decoded_final"
                    fp = os.path.join(self.working_directory, fn)
                    with open(fp, 'wb') as dcf:
                        dcf.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {fp}")
                    request.add_extracted(fp, fn, "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for f in self.files_extracted:
                        ext_file_res.add_line(os.path.basename(f))
                        request.add_extracted(
                            f, os.path.basename(f),
                            "File of interest deobfuscated from sample")
Exemple #30
0
    def execute(self, request: ServiceRequest) -> None:
        # --- Setup ----------------------------------------------------------------------------------------------
        request.result = Result()
        patterns = PatternMatch()

        if request.deep_scan:
            max_attempts = 100
        else:
            max_attempts = 10

        self.files_extracted = set()
        self.hashes = set()

        # --- Pre-Processing --------------------------------------------------------------------------------------
        # Get all IOCs prior to de-obfuscation
        pat_values = patterns.ioc_match(request.file_contents,
                                        bogon_ip=True,
                                        just_network=False)
        if pat_values and request.get_param('extract_original_iocs'):
            ioc_res = ResultSection(
                "The following IOCs were found in the original file",
                parent=request.result,
                body_format=BODY_FORMAT.MEMORY_DUMP)
            for k, val in pat_values.items():
                for v in val:
                    if ioc_res:
                        ioc_res.add_line(
                            f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}"
                        )
                        ioc_res.add_tag(k, v)

        # --- Prepare Techniques ----------------------------------------------------------------------------------
        techniques = [
            ('MSOffice Embedded script', self.msoffice_embedded_script_string),
            ('CHR and CHRB decode', self.chr_decode),
            ('String replace', self.string_replace),
            ('Powershell carets', self.powershell_carets),
            ('Array of strings', self.array_of_strings),
            ('Fake array vars', self.vars_of_fake_arrays),
            ('Reverse strings', self.str_reverse),
            ('B64 Decode', self.b64decode_str),
            ('Simple XOR function', self.simple_xor_function),
        ]
        second_pass = [('Concat strings', self.concat_strings),
                       ('MSWord macro vars', self.mswordmacro_vars),
                       ('Powershell vars', self.powershell_vars),
                       ('Charcode hex', self.charcode_hex)]
        final_pass = [
            ('Charcode', self.charcode),
        ]

        code_extracts = [('.*html.*', "HTML scripts extraction",
                          self.extract_htmlscript)]

        layers_list: List[Tuple[str, bytes]] = []
        layer = request.file_contents

        # --- Stage 1: Script Extraction --------------------------------------------------------------------------
        for pattern, name, func in code_extracts:
            if regex.match(regex.compile(pattern), request.task.file_type):
                extracted_parts = func(request.file_contents)
                layer = b"\n".join(extracted_parts).strip()
                layers_list.append((name, layer))
                break

        # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------
        idx = 0
        first_pass_len = len(techniques)
        layers_count = len(layers_list)
        while True:
            if idx > max_attempts:
                final_pass.extend(techniques)
                for name, technique in final_pass:
                    res = technique(layer)
                    if res:
                        layers_list.append((name, res))
                break
            with ThreadPoolExecutor() as executor:
                threads = [
                    executor.submit(technique, layer)
                    for name, technique in techniques
                ]
                results = [thread.result() for thread in threads]
                for i in range(len(results)):
                    result = results[i]
                    if result:
                        layers_list.append((techniques[i][0], result))
                        # Looks like it worked, restart with new layer
                        layer = result
            # If the layers haven't changed in a passing, break
            if layers_count == len(layers_list):
                if len(techniques) != first_pass_len:
                    final_pass.extend(techniques)
                    with ThreadPoolExecutor() as executor:
                        threads = [
                            executor.submit(technique, layer)
                            for name, technique in final_pass
                        ]
                        results = [thread.result() for thread in threads]
                        for i in range(len(results)):
                            result = results[i]
                            if result:
                                layers_list.append((techniques[i][0], result))
                    break
                for x in second_pass:
                    techniques.insert(0, x)
            layers_count = len(layers_list)
            idx += 1

        # --- Compiling results ----------------------------------------------------------------------------------
        if len(layers_list) > 0:
            extract_file = False
            num_layers = len(layers_list)

            # Compute heuristic
            if num_layers < 5:
                heur_id = 1
            elif num_layers < 10:
                heur_id = 2
            elif num_layers < 50:
                heur_id = 3
            elif num_layers < 100:
                heur_id = 4
            else:  # num_layers >= 100
                heur_id = 5

            # Cleanup final layer
            clean = self.clean_up_final_layer(layers_list[-1][1])
            if clean != request.file_contents:
                # Check for new IOCs
                pat_values = patterns.ioc_match(clean,
                                                bogon_ip=True,
                                                just_network=False)
                diff_tags: Dict[str, List[bytes]] = {}

                for uri in pat_values.get('network.static.uri', []):
                    # Compare URIs without query string
                    uri = uri.split(b'?', 1)[0]
                    if uri not in request.file_contents:
                        diff_tags.setdefault('network.static.uri', [])
                        diff_tags['network.static.uri'].append(uri)

                if request.deep_scan or (len(clean) > 1000
                                         and heur_id >= 4) or diff_tags:
                    extract_file = True

                # Display obfuscation steps
                mres = ResultSection(
                    "De-obfuscation steps taken by DeobsfuScripter",
                    parent=request.result)
                if heur_id:
                    mres.set_heuristic(heur_id)

                lcount = Counter([x[0] for x in layers_list])
                for l, c in lcount.items():
                    mres.add_line(f"{l}, {c} time(s).")

                # Display final layer
                byte_count = 5000
                if extract_file:
                    # Save extracted file
                    byte_count = 500
                    file_name = f"{os.path.basename(request.file_name)}_decoded_final"
                    file_path = os.path.join(self.working_directory, file_name)
                    # Ensure directory exists before write
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    with open(file_path, 'wb+') as f:
                        f.write(clean)
                        self.log.debug(
                            f"Submitted dropped file for analysis: {file_path}"
                        )
                    request.add_extracted(file_path, file_name,
                                          "Final deobfuscation layer")

                ResultSection(f"First {byte_count} bytes of the final layer:",
                              body=safe_str(clean[:byte_count]),
                              body_format=BODY_FORMAT.MEMORY_DUMP,
                              parent=request.result)

                # Display new IOCs from final layer
                if len(diff_tags) > 0:
                    ioc_new = ResultSection(
                        "New IOCs found after de-obfustcation",
                        parent=request.result,
                        body_format=BODY_FORMAT.MEMORY_DUMP)
                    has_network_heur = False
                    for ty, val in diff_tags.items():
                        for v in val:
                            if "network" in ty:
                                has_network_heur = True
                            ioc_new.add_line(
                                f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}"
                            )
                            ioc_new.add_tag(ty, v)

                    if has_network_heur:
                        ioc_new.set_heuristic(7)
                    else:
                        ioc_new.set_heuristic(6)

                if len(self.files_extracted) > 0:
                    ext_file_res = ResultSection(
                        "The following files were extracted during the deobfuscation",
                        heuristic=Heuristic(8),
                        parent=request.result)
                    for extracted in self.files_extracted:
                        file_name = os.path.basename(extracted)
                        ext_file_res.add_line(file_name)
                        request.add_extracted(
                            extracted, file_name,
                            "File of interest deobfuscated from sample")