Example #1
0
    def extract_streams(self, file_name, file_contents):
        oles = {}
        try:
            streams_res = ResultSection(score=SCORE.INFO,
                                        title_text="Embedded document stream(s)")

            is_zip = False
            is_ole = False
            # Get the OLEs
            if zipfile.is_zipfile(file_name):
                is_zip = True
                z = zipfile.ZipFile(file_name)
                for f in z.namelist():
                    if f in oles:
                        continue
                    bin_data = z.open(f).read()
                    bin_fname = os.path.join(self.working_directory,
                                             "{}.tmp".format(hashlib.sha256(bin_data).hexdigest()))
                    with open(bin_fname, 'w') as bin_fh:
                        bin_fh.write(bin_data)
                    if olefile.isOleFile(bin_fname):
                        oles[f] = olefile.OleFileIO(bin_fname)
                    elif olefile2.isOleFile(bin_fname):
                        oles[f] = olefile2.OleFileIO(bin_fname)
                z.close()

            if olefile.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile.OleFileIO(file_name)

            elif olefile2.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile2.OleFileIO(file_name)

            if is_zip and is_ole:
                streams_res.report_heuristics(Oletools.AL_Oletools_002)

            decompressed_macros = False
            for ole_filename in oles.iterkeys():
                try:
                    decompressed_macros |= self.process_ole_stream(oles[ole_filename], streams_res)
                except Exception:
                    continue

            if decompressed_macros:
                streams_res.score = SCORE.HIGH

            for _, offset, rtfobject in rtf_iter_objects(file_contents):
                rtfobject_name = hex(offset) + '.rtfobj'
                extracted_obj = os.path.join(self.working_directory, rtfobject_name)
                with open(extracted_obj, 'wb') as fh:
                    fh.write(rtfobject)
                self.request.add_extracted(extracted_obj,
                                           'Embedded RTF Object at offset %s' % hex(offset),
                                           rtfobject_name)

            if len(streams_res.body) > 0:
                self.ole_result.add_section(streams_res)

        except Exception:
            self.log.debug("Error extracting streams: {}".format(traceback.format_exc(limit=2)))

        finally:
            for fd in oles.itervalues():
                try:
                    fd.close()
                except:
                    pass
Example #2
0
    def check_xml_strings(self, path):
        xml_target_res = ResultSection(score=SCORE.NULL, title_text="Attached External Template Targets in XML")
        xml_ioc_res = ResultSection(score=SCORE.NULL, title_text="IOCs in XML:")
        xml_b64_res = ResultSection(score=SCORE.NULL, title_text="Base64 in XML:")
        try:
            template_re = re.compile(r'/attachedTemplate"\s+[Tt]arget="((?!file)[^"]+)"\s+[Tt]argetMode="External"')
            uris = []
            zip_uris = []
            b64results = {}
            b64_extracted = set()
            if zipfile.is_zipfile(path):
                try:
                    patterns = PatternMatch()
                except:
                    patterns = None
                z = zipfile.ZipFile(path)
                for f in z.namelist():
                    data = z.open(f).read()
                    if len(data) > 500000:
                        data = data[:500000]
                        xml_ioc_res.report_heuristics(Oletools.AL_Oletools_003)
                        xml_ioc_res.score = min(xml_ioc_res.score, 1)
                    zip_uris.extend(template_re.findall(data))
                    # Use FrankenStrings modules to find other strings of interest
                    # Plain IOCs
                    if patterns:
                        pat_strs = ["http://purl.org", "schemas.microsoft.com", "schemas.openxmlformats.org",
                                    "www.w3.org"]
                        pat_ends = ["themeManager.xml", "MSO.DLL", "stdole2.tlb", "vbaProject.bin", "VBE6.DLL", "VBE7.DLL"]
                        pat_whitelist = ['Management', 'Manager', "microsoft.com"]

                        st_value = patterns.ioc_match(data, bogon_ip=True)
                        if len(st_value) > 0:
                            for ty, val in st_value.iteritems():
                                if val == "":
                                    asc_asc = unicodedata.normalize('NFKC', val).encode('ascii', 'ignore')
                                    if any(x in asc_asc for x in pat_strs) \
                                            or asc_asc.endswith(tuple(pat_ends)) \
                                            or asc_asc in pat_whitelist:
                                        continue
                                    else:
                                        xml_ioc_res.score += 1
                                        xml_ioc_res.add_line("Found %s string: %s in file %s}"
                                                             % (TAG_TYPE[ty].replace("_", " "), asc_asc, f))
                                        xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                else:
                                    ulis = list(set(val))
                                    for v in ulis:
                                        if any(x in v for x in pat_strs) \
                                                or v.endswith(tuple(pat_ends)) \
                                                or v in pat_whitelist:
                                            continue
                                        else:
                                            xml_ioc_res.score += 1
                                            xml_ioc_res.add_line("Found %s string: %s in file %s"
                                                                 % (TAG_TYPE[ty].replace("_", " "), v, f))
                                            xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)

                    # Base64
                    b64_matches = set()
                    for b64_tuple in re.findall('(([\x20]{0,2}[A-Za-z0-9+/]{3,}={0,2}[\r]?[\n]?){6,})',
                                                data):
                        b64 = b64_tuple[0].replace('\n', '').replace('\r', '').replace(' ', '')
                        uniq_char = ''.join(set(b64))
                        if len(uniq_char) > 6:
                            if len(b64) >= 16 and len(b64) % 4 == 0:
                                b64_matches.add(b64)
                        """
                        Using some selected code from 'base64dump.py' by Didier Stevens@https://DidierStevens.com
                        """
                        for b64_string in b64_matches:
                            try:
                                b64_extract = False
                                base64data = binascii.a2b_base64(b64_string)
                                sha256hash = hashlib.sha256(base64data).hexdigest()
                                if sha256hash in b64_extracted:
                                    continue
                                # Search for embedded files of interest
                                if 500 < len(base64data) < 8000000:
                                    m = magic.Magic(mime=True)
                                    ftype = m.from_buffer(base64data)
                                    if 'octet-stream' not in ftype:
                                        for ft in self.filetypes:
                                            if ft in ftype:
                                                b64_file_path = os.path.join(self.working_directory,
                                                                             "{}_b64_decoded"
                                                                             .format(sha256hash[0:10]))
                                                self.request.add_extracted(b64_file_path,
                                                                           "Extracted b64 file during "
                                                                           "OLETools analysis.")
                                                with open(b64_file_path, 'wb') as b64_file:
                                                    b64_file.write(base64data)
                                                    self.log.debug("Submitted dropped file for analysis: {}"
                                                                   .format(b64_file_path))

                                                b64results[sha256hash] = [len(b64_string), b64_string[0:50],
                                                                          "[Possible base64 file contents in {}. "
                                                                          "See extracted files.]" .format(f), "", ""]

                                                b64_extract = True
                                                b64_extracted.add(sha256hash)
                                                break
                                if not b64_extract and len(base64data) > 30:
                                    if all(ord(c) < 128 for c in base64data):
                                        check_utf16 = base64data.decode('utf-16').encode('ascii', 'ignore')
                                        if check_utf16 != "":
                                            asc_b64 = check_utf16
                                        else:
                                            asc_b64 = self.ascii_dump(base64data)
                                        # If data has less then 7 uniq chars then ignore
                                        uniq_char = ''.join(set(asc_b64))
                                        if len(uniq_char) > 6:
                                            if patterns:
                                                st_value = patterns.ioc_match(asc_b64, bogon_ip=True)
                                                if len(st_value) > 0:
                                                    for ty, val in st_value.iteritems():
                                                        if val == "":
                                                            asc_asc = unicodedata.normalize('NFKC', val)\
                                                                .encode('ascii', 'ignore')
                                                            xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                                        else:
                                                            ulis = list(set(val))
                                                            for v in ulis:
                                                                xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)
                                            b64results[sha256hash] = [len(b64_string), b64_string[0:50], asc_b64,
                                                                          base64data, "{}" .format(f)]
                            except:
                                pass

                b64index = 0
                for b64k, b64l in b64results.iteritems():
                    xml_b64_res.score = 100
                    b64index += 1
                    sub_b64_res = (ResultSection(SCORE.NULL, title_text="Result {0} in file {1}"
                                                 .format(b64index, f), parent=xml_b64_res))
                    sub_b64_res.add_line('BASE64 TEXT SIZE: {}'.format(b64l[0]))
                    sub_b64_res.add_line('BASE64 SAMPLE TEXT: {}[........]'.format(b64l[1]))
                    sub_b64_res.add_line('DECODED SHA256: {}'.format(b64k))
                    subb_b64_res = (ResultSection(SCORE.NULL, title_text="DECODED ASCII DUMP:",
                                                  body_format=TEXT_FORMAT.MEMORY_DUMP,
                                                  parent=sub_b64_res))
                    subb_b64_res.add_line('{}'.format(b64l[2]))
                    if b64l[3] != "":
                        if patterns:
                            st_value = patterns.ioc_match(b64l[3], bogon_ip=True)
                            if len(st_value) > 0:
                                xml_b64_res.score += 1
                                for ty, val in st_value.iteritems():
                                    if val == "":
                                        asc_asc = unicodedata.normalize('NFKC', val).encode\
                                            ('ascii', 'ignore')
                                        xml_b64_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                    else:
                                        ulis = list(set(val))
                                        for v in ulis:
                                            xml_b64_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)
                z.close()
                for uri in zip_uris:
                    if self.parse_uri(uri):
                        uris.append(uri)

                uris = list(set(uris))
                # If there are domains or IPs, report them
                if uris:
                    xml_target_res.score = 500
                    xml_target_res.add_lines(uris)
                    xml_target_res.report_heuristics(Oletools.AL_Oletools_001)

        except Exception as e:
            self.log.debug("Failed to analyze XML: {}".format(e))

        if xml_target_res.score > 0:
            self.ole_result.add_section(xml_target_res)
        if xml_ioc_res.score > 0:
            self.ole_result.add_section(xml_ioc_res)
        if xml_b64_res.score > 0:
            self.ole_result.add_section(xml_b64_res)