예제 #1
0
파일: ole.py 프로젝트: khw5123/Project
 def unarc(self, arc_engine_id, arc_name, fname_in_arc):
     import oletools.thirdparty.olefile.olefile as olefile
     import oletools.olevba as vba
     data = None
     if arc_engine_id == 'arc_ole':  # 압축 해제 엔진 ID가 arc_ole 일 경우
         o = olefile.OleFileIO(arc_name)
         fp = o.openstream(fname_in_arc)  # OLE 파일 내부 파일 열기
         data = fp.read()  # 데이터 추출
         o.close()
         return data
     elif arc_engine_id == 'arc_vba':  # 압축 해제 엔진 ID가 arc_vba 일 경우
         v = vba.VBA_Parser(arc_name)
         if v.detect_vba_macros():  # 매크로가 존재할 경우
             macros = v.extract_all_macros()  # 매크로 추출
             for macro in macros():
                 if v.type == 'OLE':
                     name = macro[1]
                 elif v.type == 'OpenXML':
                     name = macro[0] + '/' + macro[1]
                 else:
                     name = 'UNKNOWN'
                 if name == fname_in_arc:
                     data = macro[3]  # VBA Code
                     break
         v.close()
         return data
     return None
예제 #2
0
파일: ole.py 프로젝트: khw5123/Project
 def arclist(self, filename, fileformat):
     import oletools.thirdparty.olefile.olefile as olefile
     import oletools.olevba as vba
     file_scan_list = []  # 검사 대상의 압축 엔진 ID 및 임의의 문자열이 저장될 리스트
     if 'ff_ole' in fileformat:  # format 함수에 의해 분석된 OLE 파일 포맷이 있을 경우
         # OLE Stream 목록 추출
         o = olefile.OleFileIO(filename)
         for path in o.listdir():
             name = '/'.join(path)
             if o.get_type(name) == olefile.STGTY_STREAM:  # 파일인 경우
                 file_scan_list.append(['arc_ole', name])
         o.close()
         # 매크로 목록 추출
         v = vba.VBA_Parser(filename)
         if v.detect_vba_macros():  # 매크로가 존재할 경우
             macros = v.extract_all_macros()  # 매크로 추출
             for macro in macros():
                 name = macro[1]
                 print name + '\n' + macro[3] + '\n'
                 file_scan_list.append(
                     ['arc_vba', name.encode('ascii', 'ignore')])
         v.close()
     elif 'ff_zip' in fileformat:  # OOXML(Open Office XML) 파일 포맷일 경우
         # 매크로 목록 추출
         v = vba.VBA_Parser(filename)
         if v.detect_vba_macros():  # 매크로가 존재할 경우
             macros = v.extract_all_macros()  # 매크로 추출
             for macro in macros():
                 name = macro[0] + '/' + macro[1]
                 print name + '\n' + macro[3] + '\n'
                 file_scan_list.append(
                     ['arc_vba', name.encode('ascii', 'ignore')])
         v.close()
     return file_scan_list
예제 #3
0
    def close_destination(self, destination):
        if destination.cword == b'objdata':
            log.debug('*** Close object data at index %Xh' % self.index)
            rtfobj = RtfObject()
            self.objects.append(rtfobj)
            rtfobj.start = destination.start
            rtfobj.end = destination.end
            # Filter out all whitespaces first (just ignored):
            hexdata1 = destination.data.translate(None, b' \t\r\n\f\v')
            # Then filter out any other non-hex character:
            hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1)
            if len(hexdata) < len(hexdata1):
                # this is only for debugging:
                nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1)
                log.debug('Found non-hex chars in hexdata: %r' % nonhex)
            # MS Word accepts an extra hex digit, so we need to trim it if present:
            if len(hexdata) & 1:
                log.debug('Odd length, trimmed last byte.')
                hexdata = hexdata[:-1]
            rtfobj.hexdata = hexdata
            object_data = binascii.unhexlify(hexdata)
            rtfobj.rawdata = object_data
            # TODO: check if all hex data is extracted properly

            obj = oleobj.OleObject()
            try:
                obj.parse(object_data)
                rtfobj.format_id = obj.format_id
                rtfobj.class_name = obj.class_name
                rtfobj.oledata_size = obj.data_size
                rtfobj.oledata = obj.data
                rtfobj.is_ole = True
                if obj.class_name.lower() == b'package':
                    opkg = oleobj.OleNativeStream(bindata=obj.data,
                                                  package=True)
                    rtfobj.filename = opkg.filename
                    rtfobj.src_path = opkg.src_path
                    rtfobj.temp_path = opkg.temp_path
                    rtfobj.olepkgdata = opkg.data
                    rtfobj.is_package = True
                else:
                    if olefile.isOleFile(obj.data):
                        ole = olefile.OleFileIO(obj.data)
                        rtfobj.clsid = ole.root.clsid
                        rtfobj.clsid_desc = clsid.KNOWN_CLSIDS.get(
                            rtfobj.clsid,
                            'unknown CLSID (please report at https://github.com/decalage2/oletools/issues)'
                        )
            except:
                pass
                log.debug('*** Not an OLE 1.0 Object')
예제 #4
0
def process_file(container, filename, data, output_dir=None):
    if output_dir:
        if not os.path.isdir(output_dir):
            log.info('creating output directory %s' % output_dir)
            os.mkdir(output_dir)

        fname_prefix = os.path.join(output_dir, sanitize_filename(filename))
    else:
        base_dir = os.path.dirname(filename)
        sane_fname = sanitize_filename(filename)
        fname_prefix = os.path.join(base_dir, sane_fname)

    # TODO: option to extract objects to files (false by default)
    if data is None:
        data = open(filename, 'rb').read()
    print('-' * 79)
    print('File: %r - %d bytes' % (filename, len(data)))
    ole = olefile.OleFileIO(data)
    index = 1
    for stream in ole.listdir():
        if stream[-1] == '\x01Ole10Native':
            objdata = ole.openstream(stream).read()
            stream_path = '/'.join(stream)
            log.debug('Checking stream %r' % stream_path)
            try:
                print('extract file embedded in OLE object from stream %r:' %
                      stream_path)
                print('Parsing OLE Package')
                opkg = OleNativeStream(bindata=objdata)
                print('Filename = %r' % opkg.filename)
                print('Source path = %r' % opkg.src_path)
                print('Temp path = %r' % opkg.temp_path)
                if opkg.filename:
                    fname = '%s_%s' % (fname_prefix,
                                       sanitize_filename(opkg.filename))
                else:
                    fname = '%s_object_%03d.noname' % (fname_prefix, index)
                print('saving to file %s' % fname)
                open(fname, 'wb').write(opkg.data)
                index += 1
            except:
                log.debug('*** Not an OLE 1.0 Object')
예제 #5
0
def main():
    usage = 'usage: olemap [options] <filename> [filename2 ...]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-r",
                      action="store_true",
                      dest="recursive",
                      help='find files recursively in subdirectories.')
    parser.add_option(
        "-z",
        "--zip",
        dest='zip_password',
        type='str',
        default=None,
        help=
        'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)'
    )
    parser.add_option(
        "-f",
        "--zipfname",
        dest='zip_fname',
        type='str',
        default='*',
        help=
        'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)'
    )
    # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
    #                         help="logging level debug/info/warning/error/critical (default=%default)")
    parser.add_option("--header",
                      action="store_true",
                      dest="header",
                      help='Display the OLE header (default: yes)')
    parser.add_option("--fat",
                      action="store_true",
                      dest="fat",
                      help='Display the FAT (default: no)')
    parser.add_option("--minifat",
                      action="store_true",
                      dest="minifat",
                      help='Display the MiniFAT (default: no)')
    parser.add_option('-x',
                      "--exdata",
                      action="store_true",
                      dest="extra_data",
                      help='Display a hex dump of extra data at end of file')

    # TODO: add logfile option

    (options, args) = parser.parse_args()

    # Print help if no arguments are passed
    if len(args) == 0:
        print(BANNER)
        print(__doc__)
        parser.print_help()
        sys.exit()

    # if no display option is provided, set defaults:
    default_options = False
    if not (options.header or options.fat or options.minifat):
        options.header = True
        # options.fat = True
        # options.minifat = True
        default_options = True

    # print banner with version
    print(BANNER)

    for container, filename, data in xglob.iter_files(
            args,
            recursive=options.recursive,
            zip_password=options.zip_password,
            zip_fname=options.zip_fname):
        # TODO: handle xglob errors
        # ignore directory names stored in zip files:
        if container and filename.endswith('/'):
            continue
        full_name = '%s in %s' % (filename,
                                  container) if container else filename
        print("-" * 79)
        print('FILE: %s\n' % full_name)
        if data is not None:
            # data extracted from zip file
            ole = olefile.OleFileIO(data)
        else:
            # normal filename
            ole = olefile.OleFileIO(filename)

        if options.header:
            show_header(ole, extra_data=options.extra_data)
        if options.fat:
            show_fat(ole)
        if options.minifat:
            show_minifat(ole)

        ole.close()

    # if no display option is provided, print a tip:
    if default_options:
        print(
            'To display the FAT or MiniFAT structures, use options --fat or --minifat, and -h for help.'
        )
예제 #6
0
    def extract_streams(self, file_name, file_contents):
        oles = {}
        try:
            streams_res = ResultSection(score=SCORE.INFO,
                                        title_text="Embedded document stream(s)")

            is_zip = False
            is_ole = False
            # Get the OLEs
            if zipfile.is_zipfile(file_name):
                is_zip = True
                z = zipfile.ZipFile(file_name)
                for f in z.namelist():
                    if f in oles:
                        continue
                    bin_data = z.open(f).read()
                    bin_fname = os.path.join(self.working_directory,
                                             "{}.tmp".format(hashlib.sha256(bin_data).hexdigest()))
                    with open(bin_fname, 'w') as bin_fh:
                        bin_fh.write(bin_data)
                    if olefile.isOleFile(bin_fname):
                        oles[f] = olefile.OleFileIO(bin_fname)
                    elif olefile2.isOleFile(bin_fname):
                        oles[f] = olefile2.OleFileIO(bin_fname)
                z.close()

            if olefile.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile.OleFileIO(file_name)

            elif olefile2.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile2.OleFileIO(file_name)

            if is_zip and is_ole:
                streams_res.report_heuristics(Oletools.AL_Oletools_002)

            decompressed_macros = False
            for ole_filename in oles.iterkeys():
                try:
                    decompressed_macros |= self.process_ole_stream(oles[ole_filename], streams_res)
                except Exception:
                    continue

            if decompressed_macros:
                streams_res.score = SCORE.HIGH

            for _, offset, rtfobject in rtf_iter_objects(file_contents):
                rtfobject_name = hex(offset) + '.rtfobj'
                extracted_obj = os.path.join(self.working_directory, rtfobject_name)
                with open(extracted_obj, 'wb') as fh:
                    fh.write(rtfobject)
                self.request.add_extracted(extracted_obj,
                                           'Embedded RTF Object at offset %s' % hex(offset),
                                           rtfobject_name)

            if len(streams_res.body) > 0:
                self.ole_result.add_section(streams_res)

        except Exception:
            self.log.debug("Error extracting streams: {}".format(traceback.format_exc(limit=2)))

        finally:
            for fd in oles.itervalues():
                try:
                    fd.close()
                except:
                    pass