Python OleFileIO Examples, olefile.OleFileIO Python Examples

Example #1

0

Show file

File: PPTExtractor.py Project: whitekoat/PPTExtractor

    def _process(self, file):
        """
        Busca imágenes dentro de stream y guarda referencia a su ubicación.
        """
        olefile = OleFile.OleFileIO(file)

        # Al igual que en pptx esto no es un error
        if not olefile.exists("Pictures"):
            return
            # raise IOError("Pictures stream not found")

        self.__stream = olefile.openstream("Pictures")

        stream = self.__stream
        offset = 0

        while True:
            header = stream.read(self.headerlen)
            offset += self.headerlen

            if not header:
                break

            # cabecera
            rec_instance, rec_type, rec_len = struct.unpack_from(
                "<HHL", header)

            # mover a siguiente cabecera
            stream.seek(rec_len, 1)

            if DEBUG:
                print("%X %X %sb" % (rec_type, rec_instance, rec_len))

            extrabytes, ext = formats.get((rec_type, rec_instance))

            # Eliminar bytes extra
            rec_len -= extrabytes
            offset += extrabytes

            self._files.append((offset, rec_len))
            offset += rec_len

Example #2

0

Show file

 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     extracted: List[ExtractedPayload] = []
     errors: List[Error] = []
     ole_object = olefile.OleFileIO(payload.content)
     streams = ole_object.listdir(streams=True)
     for stream in streams:
         try:
             stream_buffer = ole_object.openstream(stream).read()
             name = ''.join(
                 filter(lambda x: x in string.printable, '_'.join(stream)))
             if stream_buffer.endswith(b'\x01Ole10Native'):
                 ole_native = oleobj.OleNativeStream(stream_buffer)
                 if ole_native.filename:
                     name = f'{name}_{str(ole_native.filename)}'
                 else:
                     name = f'{name}_olenative'
                 meta = PayloadMeta(
                     should_archive=False,
                     extra_data={
                         'index': streams.index(stream),
                         'name': name
                     },
                 )
                 extracted.append(ExtractedPayload(ole_native.data, meta))
             else:
                 meta = PayloadMeta(
                     should_archive=False,
                     extra_data={
                         'index': streams.index(stream),
                         'name': name
                     },
                 )
                 extracted.append(ExtractedPayload(stream_buffer, meta))
         except Exception as err:
             errors.append(
                 Error(
                     error=str(err),
                     plugin_name=self.plugin_name,
                     payload_id=payload.payload_id,
                 ))
     return WorkerResponse(extracted=extracted, errors=errors)

Example #3

0

Show file

File: msoffice_decrypt.py Project: seanthegeek/msoffice_decrypt

    def load(self):
        # have we already loaded?
        if self.loaded:
            return

        self.loaded = True

        if not olefile.isOleFile(self.source_file):
            return False

        self.is_ole_file = True

        ole = olefile.OleFileIO(self.source_file)
        try:
            # is this document encrypted?
            if not ole.exists('encryptioninfo') or not ole.exists(
                    'encryptedpackage'):
                self.is_encrypted = False
                return

            self.is_encrypted = True
            info_stream = ole.openstream('EncryptionInfo')

            # is this standard, extensible or agile encryption?
            # agile will have an xml tag after the first 8 bytes
            info_stream.seek(8)
            xml_header = info_stream.read(5)
            info_stream.seek(0)

            if xml_header == b'<?xml':
                self.encryption_type = ENCRYPTION_TYPE_AGILE
                self.parse_agile_encryption_info(info_stream)
                return

            # initially we assume it's standard
            # the code to parse standard will figure out if it's extensible
            self.encryption_type = ENCRYPTION_TYPE_STANDARD
            self.parse_standard_encryption_info(info_stream)

        finally:
            ole.close()

Example #4

0

Show file

def process_file(filepath, field_filter_mode=None):
    """ decides which of the process_* functions to call """
    if olefile.isOleFile(filepath):
        logger.debug('Is OLE. Checking streams to see whether this is xls')
        if xls_parser.is_xls(filepath):
            logger.debug('Process file as excel 2003 (xls)')
            return process_xls(filepath)
        if is_ppt(filepath):
            logger.debug('is ppt - cannot have DDE')
            return u''
        logger.debug('Process file as word 2003 (doc)')
        with olefile.OleFileIO(filepath, path_encoding=None) as ole:
            return process_doc(ole)

    with open(filepath, 'rb') as file_handle:
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    if doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    # could be docx; if not: this is the old default code path
    logger.debug('Process file as word 2007+ (docx)')
    return process_docx(filepath, field_filter_mode)

Example #5

0

Show file

def oleDump(inFile):
    buff = []
    try:
        if olefile.isOleFile(inFile):
            with olefile.OleFileIO(inFile) as ole:
                buff.append(["office_author", ole.get_metadata().author.decode("utf-8")])
                buff.append(["office_created", str(ole.get_metadata().create_time)])
                buff.append(["office_last-saved-author", ole.get_metadata().last_saved_by.decode("utf-8")])
                buff.append(["office_last-saved-time", str(ole.get_metadata().last_saved_time)])
                try:
                    buff.append(["office_title", ole.get_metadata().title.decode("utf-8")])
                    buff.append(["office_subject", ole.get_metadata().subject.decode("utf-8")])
                except:
                    pass
                if ole.get_metadata().security == 1:
                    buff.append(["office_password","True"])
                else:
                    buff.append(["office_password","False"])
    except IsADirectoryError:
        pass
    return buff

Example #6

0

Show file

    def __init__(self, file):
        self.file = file
        ole = olefile.OleFileIO(file)  # do not close this, would close file
        self.ole = ole
        self.format = "doc97"
        self.keyTypes = ["password"]
        self.key = None
        self.salt = None

        # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx
        with ole.openstream("wordDocument") as stream:
            fib = _parseFib(stream)

        # https://msdn.microsoft.com/en-us/library/dd923367(v=office.12).aspx
        tablename = "1Table" if fib.base.fWhichTblStm == 1 else "0Table"

        Info = namedtuple("Info", ["fib", "tablename"])
        self.info = Info(
            fib=fib,
            tablename=tablename,
        )

Example #7

0

Show file

def get_compound_file_binary(file):
    """
    获取复合文件二进制格式文件中的数据

    Compound File Binary Format Files
    https://stackoverflow.com/questions/12705527/reading-excel-files-with-xlrd
    """
    try:
        import olefile
        with open(file, 'rb') as f:
            if str(file).endswith('.xls'):
                ole = olefile.OleFileIO(f)
                # print(ole.listdir())
                if ole.exists('Workbook'):
                    d = ole.openstream('Workbook')
                    return d.read()
            return f.read()
    except ImportError as e:
        print(Fore.RED + "注：找不到 olefile，请安装它: pip install olefile",
              Style.RESET_ALL)
        pass

Example #8

0

Show file

File: doc97.py Project: nate998877/backend-dotm-search-assessment

    def __init__(self, file):
        self.file = file
        ole = olefile.OleFileIO(file)  # do not close this, would close file
        self.ole = ole
        self.format = "doc97"
        self.keyTypes = ['password']
        self.key = None
        self.salt = None

        # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx
        with ole.openstream('wordDocument') as stream:
            fib = _parseFib(stream)

        # https://msdn.microsoft.com/en-us/library/dd923367(v=office.12).aspx
        tablename = '1Table' if fib.base.fWhichTblStm == 1 else '0Table'

        Info = namedtuple('Info', ['fib', 'tablename'])
        self.info = Info(
            fib=fib,
            tablename=tablename,
        )

Example #9

0

Show file

File: poisonpen.py Project: clicknull/poisonpen

  def insert_olefile( self, filepath, icon, caption ):

    # Insert the file as OLE
    oletmpl = 'resource/oleObject1.bin'
    tmpolefile = tempfile.NamedTemporaryFile().name 
    shutil.copy( oletmpl, tmpolefile )
    ole = olefile.OleFileIO(tmpolefile,write_mode=True)
    streams = ole.listdir()
    for s in streams: 
      print s, ole.get_size(s)
    streamname = '\x01Ole10Native'
    with open(filepath,'rb') as f:
      size = ole.get_size(streamname)
      print 'Size: ' + str( size )
      data = f.read().ljust(size,'\x00')
      print 'Data size: ' + str( len( data ) )
      ole.write_stream(streamname, data)
    
    # Insert file icon / name
    tmpemffile = tempfile.NamedTemporaryFile().name 
    emf = pyemf.EMF(100,70,300)
    icotmpl = 'resource/' + icon + '.emf'
    emf.load(icotmpl)
    emf.TextOut( 10, 80, caption )
    emf.save(tmpemffile)
    streamname = '\x03ObjInfo'
    with open( tmpemffile, 'rb' ) as f:
      size = ole.get_size(streamname)
      print 'Size: ' + str( size )
      data = f.read().ljust(size,'\x00')
      print 'Data size: ' + str( len( data ) )
      ole.write_stream(streamname, data)
    
    ole.close()
    intpath = 'word/embeddings/oleObject1.bin'
    with open( tmpolefile, 'rb' ) as f:
      self.contents[intpath] = f.read()

    # Get a rid
    rid = self.add_rel( 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject', intpath )

Example #10

0

Show file

def _get_ole_metadata(fp):
    try:
        raw = olefile.OleFileIO(fp).get_metadata()

        tidied = {}
        # The value we get here is a signed 16-bit quantity, even though
        # the file format specifies values up to 65001
        tidied["codepage"] = raw.codepage
        if tidied["codepage"] < 0:
            tidied["codepage"] += 65536
        codec = _codepage_to_codec(tidied["codepage"])
        if codec:
            for name in olefile.OleMetadata.SUMMARY_ATTRIBS:
                if name in tidied:
                    continue
                value = getattr(raw, name)
                if isinstance(value, bytes):
                    value, _ = codec.decode(value)
                tidied[name] = value
        return tidied
    except FileNotFoundError:
        return None

Example #11

0

Show file

def ms_doc(ms_file_list):
    software_list = []
    user_names = []
    info('Extracting MSDOCS MetaData')
    for filename in ms_file_list:
        try:
            data = olefile.OleFileIO(filename)
            meta = data.get_metadata()
            author = re.sub('[^0-9a-zA-Z]+', ' ', meta.author)
            company = re.sub('[^0-9a-zA-Z]+', ' ', meta.company)
            software = re.sub('[^0-9a-zA-Z]+', ' ', meta.creating_application)
            save_by = re.sub('[^0-9a-zA-Z]+', ' ', meta.last_saved_by)
            if author:
                oddity = re.match('(\s\w\s+(\w\s+)+\w)', author)
                if oddity:
                    oddity = str(oddity.group(1)).replace(' ', '')
                    user_names.append(str(oddity).title())
                else:
                    user_names.append(str(author).title())
            if software:
                oddity2 = re.match('(\s\w\s+(\w\s+)+\w)', software)
                if oddity2:
                    oddity2 = str(oddity2.group(1)).replace(' ', '')
                    software_list.append(oddity2)
                else:
                    software_list.append(software)

            if save_by:
                oddity3 = re.match('(\s\w\s+(\w\s+)+\w)', save_by)
                if oddity3:
                    oddity3 = str(oddity3.group(1)).replace(' ', '')
                    user_names.append(str(oddity3).title())
                else:
                    user_names.append(str(save_by).title())

        except Exception:
            pass
    info('Finished Extracting MSDOC MetaData')
    return (user_names, software_list)

Example #12

0

Show file

    def decrypt_agile_aes_cbc(self, encryption_key):
        SEGMENT_LENGTH = 4096
        ole = olefile.OleFileIO(self.source_file)
        ep = ole.openstream('EncryptedPackage')
        try:
            obuf = b''
            totalSize = unpack('<I', ep.read(4))[0]
            #sys.stderr.write("totalSize: {}\n".format(totalSize))
            ep.seek(8)
            with open(self.output_file, 'wb') as fp:
                for i, ibuf in enumerate(iter(functools.partial(ep.read, SEGMENT_LENGTH), b'')):
                    saltWithBlockKey = self.encryption_info.key_data_salt + pack('<I', i)
                    iv = hashCalc(saltWithBlockKey, self.encryption_info.key_data_hash_algorithm).digest()
                    iv = iv[:16]
                    aes = AES.new(encryption_key, AES.MODE_CBC, iv)
                    dec = aes.decrypt(ibuf)
                    fp.write(dec)

            return True

        finally:
            ole.close()

Example #13

0

Show file

File: file_handlers.py Project: bobquest33/stick_notes_parser_code

def get_notes(sticky_notes_file_path):
    notes = []

    snt_file = olefile.OleFileIO(sticky_notes_file_path)

    for storage in snt_file.listdir(storages=True, streams=False):
        note_id = storage[0]  # UUID-like string representing the note ID
        note_text_rtf_file = '0'  # RTF content of the note

        with snt_file.openstream([note_id,
                                  note_text_rtf_file]) as note_content:
            rawdata = note_content.read()
            encoding = chardet.detect(rawdata)
            #print(encoding)
            note_text_rtf = rawdata.decode('ascii')
            #note_text_rtf = rawdata.decode('utf-8')
        #print(note_text_rtf)
        notes.append({'text': getMarkdown(note_text_rtf), 'color': None})

    snt_file.close()

    return notes

Example #14

0

Show file

File: filecheck.py Project: eurodude/PyCIRCLean

 def _winoffice(self):
     """Processes a winoffice file using olefile/oletools."""
     self.cur_file.add_log_details('processing_type', 'WinOffice')
     # Try as if it is a valid document
     oid = oletools.oleid.OleID(self.cur_file.src_path)
     if not olefile.isOleFile(self.cur_file.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.cur_file.src_path,
                                     raise_defects=olefile.DEFECT_INCORRECT)
         except:
             self.cur_file.add_log_details('not_parsable', True)
             self.cur_file.make_dangerous()
         if ole.parsing_issues:
             self.cur_file.add_log_details('parsing_issues', True)
             self.cur_file.make_dangerous()
         else:
             if ole.exists('macros/vba') or ole.exists('Macros') \
                     or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
                 self.cur_file.add_log_details('macro', True)
                 self.cur_file.make_dangerous()
     else:
         indicators = oid.check()
         # Encrypted ban be set by multiple checks on the script
         if oid.encrypted.value:
             self.cur_file.add_log_details('encrypted', True)
             self.cur_file.make_dangerous()
         if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                 or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
             self.cur_file.add_log_details('macro', True)
             self.cur_file.make_dangerous()
         for i in indicators:
             if i.id == 'ObjectPool' and i.value:
                 # FIXME: Is it suspicious?
                 self.cur_file.add_log_details('objpool', True)
             elif i.id == 'flash' and i.value:
                 self.cur_file.add_log_details('flash', True)
                 self.cur_file.make_dangerous()
     self._safe_copy()

Example #15

0

Show file

File: windows_msi.py Project: mmaj5524/grr

    def __init__(self, path: str):
        self._olefile = olefile.OleFileIO(path, write_mode=True)
        self._stack = contextlib.ExitStack()
        self._tmp_dir = self._stack.enter_context(utils.TempDirectory())

        def ReadStream(name):
            with self._olefile.openstream(name) as stream:
                return stream.read(self._olefile.get_size(name))

        string_pool_raw = ReadStream(STRING_POOL_STREAM_NAME)
        string_data_raw = ReadStream(STRING_DATA_STREAM_NAME)
        self._string_pool = StringPool(string_pool_raw, string_data_raw)
        feature_raw = ReadStream(FEATURE_STREAM_NAME)
        self._feature_table = FeatureTable(feature_raw, self._string_pool)

        cab_path = os.path.join(self._tmp_dir, "input.cab")
        cab_tmp_path = os.path.join(self._tmp_dir, "cab_tmp_dir")
        with open(cab_path, "wb") as f:
            f.write(ReadStream(GRR_CAB_STREAM_NAME))
        self._cab = cab_utils.Cab(cab_path, cab_tmp_path)
        self._cab.ExtractFiles()
        self._cab.WriteFile("PaddingFile", b"")

Example #16

0

Show file

    def seed_analyzer(self):

        self.ole = olefile.OleFileIO(self.seed, write_mode=True)
        self.hwp_field = self.ole.listdir()

        print '\n'
        print '[*] HWP Seed info\n'

        #hwp field와 그 field의 size를 딕셔너리에 저장
        for i in range(len(self.hwp_field)):
            field = self.hwp_field[i]
            field_size = self.ole.get_size(self.hwp_field[i])

            if len(field) > 1:
                storage = field[0]
                stream = field[1]
                field = str(storage + '/' + stream)
                self.hwp_field_size[field] = field_size

            else:
                field = str(field[0])
                self.hwp_field_size[field] = field_size

Example #17

0

Show file

File: mertools.py Project: dmroeder/mer_tools

    def get_platform(self):
        """
        Gets the platform that the project is targeting
        (ME or SE)

        returns str
        """
        with olefile.OleFileIO(self.file) as ole:
            # list the directory structure of the file
            ld = ole.listdir()
        name = ''
        for item in ld:
            if item[0].endswith('.med') or item[0].endswith('.sed'):
                name = item[0]

        extension = name[-3:].lower()
        if extension == "med":
            return "FactoryTalk View Studio ME"
        elif extension == "sed":
            return "FactoryTalk View Studio SE"
        else:
            return "Unknown platform"

Example #18

0

Show file

def _read_doc_vars(fname):
    """
    Use a heuristic to try to read in document variable names and values from
    the 1Table OLE stream. Note that this heuristic is kind of hacky and is not
    close to being a general solution for reading in document variables, but it
    serves the need for ViperMonkey emulation.

    TODO: Replace this when actual support for reading doc vars is added to olefile.
    """

    try:

        # Pull out all of the wide character strings from the 1Table OLE data.
        ole = olefile.OleFileIO(fname, write_mode=False)
        data = ole.openstream("1Table").read()
        tmp_strs = re.findall("(([^\x00-\x1F\x7F-\xFF]\x00){4,})", data)
        strs = []
        for s in tmp_strs:
            s1 = s[0].replace("\x00", "").strip()
            strs.append(s1)

        # Treat each wide character string as a potential variable that has a value
        # of the string 1 positions ahead on the current string. This introduces "variables"
        # that don't really exist into the list, but these variables will not be accessed
        # by valid VBA so emulation will work.
        pos = 0
        r = []
        for s in strs:
            # TODO: Figure out if this is 1 or 2 positions ahead.
            if ((pos + 1) < len(strs)):
                r.append((s, strs[pos + 1]))
            pos += 1

        # Return guesses at doc variable assignments.
        return r

    except Exception as e:
        log.error("Cannot read document variables. " + str(e))
        return []

Example #19

0

Show file

File: commands.py Project: GuillaumeCouchard/snake-scales

    def metadata(self, args, file, opts):
        try:
            meta = olefile.OleFileIO(file.file_path).get_metadata()
        except Exception:
            raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file')

        attribs = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', 'template',
                   'last_saved_by', 'revision_number', 'total_edit_time', 'last_printed', 'create_time',
                   'last_saved_time', 'num_pages', 'num_words', 'num_chars', 'thumbnail',
                   'creating_application', 'security', 'codepage_doc', 'category', 'presentation_target',
                   'bytes', 'lines', 'paragraphs', 'slides', 'notes', 'hidden_slides', 'mm_clips',
                   'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', 'company', 'links_dirty',
                   'chars_with_spaces', 'unused', 'shared_doc', 'link_base', 'hlinks', 'hlinks_changed',
                   'version', 'dig_sig', 'content_type', 'content_status', 'language', 'doc_version']

        output = {}
        for attrib in attribs:
            if isinstance(getattr(meta, attrib), bytes):
                output[attrib] = str(getattr(meta, attrib).decode("utf-8"))
            else:
                output[attrib] = str(getattr(meta, attrib))
        return output

Example #20

0

Show file

File: file_msi_get_info_ole.py Project: jgstew/tools

def main(pathname):
    # Check file exists:
    if not os.path.exists(pathname):
        raise FileNotFoundError(pathname)

    ole = olefile.OleFileIO(pathname)

    meta = ole.get_metadata()

    for prop in meta.SUMMARY_ATTRIBS:
        value = getattr(meta, prop)
        print((prop, value))
    # print(meta.dump())

    create_time = getattr(meta, "create_time")
    last_saved_time = getattr(meta, "last_saved_time")
    dates_to_compare = []
    dates_to_compare.append(create_time)
    dates_to_compare.append(last_saved_time)
    max_time = max(dates_to_compare)
    # print(max_time)
    max_time_yyyymmdd = max_time.strftime("%Y-%m-%d")
    print(f"Max Mod Time: {max_time_yyyymmdd}")

    # get raw info:
    # other_props = ole.getproperties("\x05SummaryInformation")
    # for item in other_props.values():
    #     print(item)

    if not ole.exists("\x05DigitalSignature"):
        print("WARNING: File not signed!")
    else:
        with ole.openstream("\x05DigitalSignature") as fh:
            sig_data = fh.read()
            print(f"Digital Signature: {len(sig_data)} bytes")

    # end:
    ole.close()

Example #21

0

Show file

def process_file(filename, extract):
    pkgobj = None
    if olefile.isOleFile(filename):
        print ' [*] File is an OLE file...'
        ole = olefile.OleFileIO(filename)
        filelist = ole.listdir()
        print ' [*] Processing Streams...'
        for fname in filelist:
            if '\x01Ole10Native' in fname:
                print ' [*] Found Ole10Native Stream...checking for packager data'
                sdata = ole.openstream(fname).read()
                if sdata[4:6].encode('hex') == '0200':
                    print ' [*] Stream contains Packager Formatted data...'
                    pkgobj = PackagerStream(sdata[4:].encode('hex'))
                    print
                    print pkgobj

    elif isstream(filename):
        with open(filename, 'rb') as f:
            sdata = f.read()
            print ' [*] File is an extracted Packager Stream'
            print ' [*] Stream contains Packager Formatted data...'
            pkgobj = PackagerStream(sdata[4:].encode('hex'))
            print
            print pkgobj
    else:
        # Treat the file as an rtf doc
        rd = RTFDoc(filename)
        print ' [*] Scanning file for embedded objects'
        rd.scan()

    if extract:
        try:
            print ' Extracting embedded data as %s' % pkgobj.gethash('md5')
            with open(pkgobj.gethash('md5'), 'wb') as out:
                out.write(pkgobj.Data)
        except Exception as e:
            print ' [!] An error occurred while writing the file :: %s' % e

Example #22

0

Show file

def extract_ole_metadata(doc_path) -> str:
    metadata = ""

    ole = olefile.OleFileIO(doc_path)

    olemeta = ole.get_metadata()

    value = getattr(olemeta, "author").decode(ENCODING)
    if value:
        metadata += AUTHOR + value + "\n"
    else:
        metadata += AUTHOR + NA + "\n"

    value = getattr(olemeta, "title").decode(ENCODING)
    if value:
        metadata += TITLE + value + "\n"
    else:
        metadata += TITLE + NA + "\n"

    value = getattr(olemeta, "last_saved_by").decode(ENCODING)
    if value:
        metadata += LAST_SAVED_BY + value + "\n"
    else:
        metadata += LAST_SAVED_BY + NA + "\n"

    value = getattr(olemeta, "create_time").strftime(TIME_FORMAT)
    if value:
        metadata += CREATE_TIME + value + "\n"
    else:
        metadata += CREATE_TIME + NA + "\n"

    value = getattr(olemeta, "last_saved_time").strftime(TIME_FORMAT)
    if value:
        metadata += MODIFIED_TIME + value + "\n"
    else:
        metadata += MODIFIED_TIME + NA + "\n"

    return metadata

Example #23

0

Show file

File: def.py Project: leeforest/VAEFuzzer

    def load_data(self):
        tmp_list = []
        hex_content = []
        total = []
        hex_arr = []
        total_list = []
        total_arr = []

        #check if olefile
        for file in self.im_file:
            im_file = file

            if not olefile.isOleFile(im_file):
                self.im_file.remove(file)

        for file in self.im_file:
            im_file = file
            ole = olefile.OleFileIO(im_file, write_mode=True)
            stream = ole.openstream(field)
            data = stream.read()
            stream.seek(0)
            #print(file)
            #print('[*] real data: ',data)
            hex_content.append(data)
        #print(len(hex_content)) #274

        for string_hex in hex_content:
            string_hex = binascii.hexlify(string_hex)

            hex_list = [
                int(string_hex[i:i + 2], 16)
                for i in range(0, len(string_hex), 2)
            ]
            hex_arr = np.asarray(hex_list)
            total_list.append(hex_arr)
        total_arr = np.asarray(total_list)
        return total_arr

Example #24

0

Show file

def read(doc, fileName):
    if (olefile.isOleFile(fileName)):
        setEndianess(LITTLE_ENDIAN)
        ole = olefile.OleFileIO(fileName)
        p = ole.getproperties('\x05DocumentSummaryInformation',
                              convert_time=True,
                              no_conversion=[10])
        p = ole.getproperties('\x05SummaryInformation',
                              convert_time=True,
                              no_conversion=[10])
        if (DEBUG): FreeCAD.Console.PrintMessage("==== ClassData       ===\n")
        readClassData(ole, fileName)
        if (DEBUG): FreeCAD.Console.PrintMessage("==== Config          ===\n")
        readConfig(ole, fileName)
        if (DEBUG): FreeCAD.Console.PrintMessage("==== DllDirectory    ===\n")
        readDllDirectory(ole, fileName)
        if (DEBUG): FreeCAD.Console.PrintMessage("==== ClassDirectory3 ===\n")
        readClassDirectory3(ole, fileName)
        if (DEBUG): FreeCAD.Console.PrintMessage("==== VideoPostQueue  ===\n")
        readVideoPostQueue(ole, fileName)
        if (DEBUG): FreeCAD.Console.PrintMessage("==== Scene           ===\n")
        readScene(doc, ole, fileName)
    else:
        FreeCAD.Console.PrintError("File seems to be no 3D Studio Max file!")

Example #25

0

Show file

File: tikaModule.py Project: kim3163/kimjoon.github

	def run(self) :
		print ('Tika Start')
		print ( '확장자명 : %s' % self.ext )

		if self.ext in self.OLE_EXT_LIST :
			print (self.ext)
			ole 		= olefile.OleFileIO(self.path)

			hwpTree = ole.listdir()
			contents = ole.openstream('PrvText').read()
			self.fileWrite(contents.decode('utf-16'))
#			for oneInfo in hwpTree :
#				if 'BodyText' in oneInfo :
#					for oneStr in oneInfo :
#						if oneStr != 'BodyText' :
							#contents = ole.openstream(('BodyText/%s' % oneStr)).read()
							#print (type(ole.openstream('PrvText').read()) )
							#contents = ole.openstream(('BodyText/%s' % oneStr)).decode('utf-16le').readlines()
#							unzipCont = zlib.decompress(contents, -15)
#							print( unzipCont )
						#	print( base64.decodestring(unzipCont) )
								
#							self.fileWrite(base64.decodestring(unzipCont).decode('utf-16'))
			#print (ole.listdir( streams = False, storages = True )[0] )

			#datas		= ole.get_metadata()

			#datas		= ole.openstream('BodyText')
			#print (ole.dump())
#			oleStr	 	= datas.read()
#			print(dir(oleStr))

#			self.fileWrite(oleStr)

		else :
			self.fileToText()

Example #26

0

Show file

File: ole_meta_macros.py Project: AustinWafula/OLE_META_MACROS

def obtain_meta():
        for root,dir,files in os.walk(sys.argv[1]):
            for file in files :
                try:
                    if file not in file_l:
                        file_l.append(file)
                        ole_win=olefile.OleFileIO(file,raise_defects=olefile.DEFECT_INCORRECT)
                        file_hash_SHA256= hashlib.sha256()
                        file_hash_MD5=hashlib.md5()
                        with open(file,'rb') as f:
                            file_bs=f.read(BUFFER_SIZE)
                            while len(file_bs)>0:
                                file_hash_SHA256.update(file_bs)
                                file_hash_MD5.update(file_bs)
                                file_bs=f.read(BUFFER_SIZE)
                        print(file)
                        print(os.path.abspath(file))
                        print("SHA-256:",file_hash_SHA256.hexdigest())
                        print('MD5:',file_hash_MD5.hexdigest())
                        meta=ole_win.get_metadata()
                        print(meta.dump())
                        print('')
                except:
                    pass

Example #27

0

Show file

File: ftguess.py Project: marcos-borges/oletools

 def recognize(cls, ftg):
     # Here there's an issue with non-OLE files smaller than 1536 bytes
     # see https://github.com/decalage2/olefile/issues/142
     # Workaround: pad data when it's smaller than 1536 bytes
     # TODO: use the new data parameter of isOleFile when it's implemented
     if len(ftg.data) < 1536:
         data = ftg.data + (b'\x00' * 1536)
     else:
         data = ftg.data
     if olefile.isOleFile(data):
         # open the OLE file
         try:
             # Open and parse the OLE file:
             ftg.olefile = olefile.OleFileIO(ftg.data)
             # Extract the CLSID of the root storage
             ftg.root_clsid = ftg.olefile.root.clsid
             ftg.root_clsid_name = clsid.KNOWN_CLSIDS.get(
                 ftg.root_clsid, None)
         except:
             # TODO: log the error
             return False
         return True
     else:
         return False

Example #28

0

Show file

def decode():
    version()

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:", ["help", "file="])

    except getopt.GetoptError:
        help()
        sys.exit(2)

    for opt, arg in opts:

        if opt in ("-h", "--help"):
            help()
            sys.exit()

        elif opt in ("-f", "--file"):
            zip_ref = zipfile.ZipFile(arg, "r")
            output = arg + "_unpacked"
            zip_ref.extractall(output)
            zip_ref.close()

            for path, subdirs, files in os.walk(output):
                for name in files:
                    sys.stdout.write("%s\n" % name)
                    if name.endswith(".bin"):
                        oleFile = os.path.join(path, name)
                        ole = olefile.OleFileIO(oleFile)
                        oleList = ole.listdir()
                        for i in oleList:
                            sys.stdout.write("%s\n" % i)
                            pics = ole.openstream(i)
                            data = pics.read()
                            parcelID = os.path.join(path,i[0])
                            outFile = open(parcelID, 'w')
                            outFile.write(data)

Example #29

0

Show file

File: vmonkey.py Project: qqvirus/ViperMonkey

def process_file_scanexpr (container, filename, data):
    """
    Process a single file

    :param container: str, path and filename of container if the file is within
    a zip archive, None otherwise.
    :param filename: str, path and filename of file on disk, or within the container.
    :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
    """
    #TODO: replace print by writing to a provided output file (sys.stdout by default)
    if container:
        display_filename = '%s in %s' % (filename, container)
    else:
        display_filename = filename
    print '='*79
    print 'FILE:', display_filename
    all_code = ''
    try:
        #TODO: handle olefile errors, when an OLE file is malformed
        vba = VBA_Parser(filename, data, relaxed=True)
        print 'Type:', vba.type
        if vba.detect_vba_macros():

            # Read in document metadata.
            ole = olefile.OleFileIO(filename)
            vba_library.meta = ole.get_metadata()
            
            #print 'Contains VBA Macros:'
            for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
                # hide attribute lines:
                #TODO: option to disable attribute filtering
                vba_code_filtered = filter_vba(vba_code)
                print '-'*79
                print 'VBA MACRO %s ' % vba_filename
                print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
                print '- '*39
                # detect empty macros:
                if vba_code_filtered.strip() == '':
                    print '(empty macro)'
                else:
                    # TODO: option to display code
                    print vba_code_filtered
                    vba_code = vba_collapse_long_lines(vba_code)
                    all_code += '\n' + vba_code
            print '-'*79
            print 'EVALUATED VBA EXPRESSIONS:'
            t = prettytable.PrettyTable(('Obfuscated expression', 'Evaluated value'))
            t.align = 'l'
            t.max_width['Obfuscated expression'] = 36
            t.max_width['Evaluated value'] = 36
            for expression, expr_eval in scan_expressions(all_code):
                t.add_row((repr(expression), repr(expr_eval)))
            print t


        else:
            print 'No VBA macros found.'
    except: #TypeError:
        #raise
        #TODO: print more info if debug mode
        #print sys.exc_value
        # display the exception with full stack trace for debugging, but do not stop:
        traceback.print_exc()
    print ''

Example #30

0

Show file

File: vmonkey.py Project: qqvirus/ViperMonkey

def process_file (container, filename, data,
                  altparser=False, strip_useless=False):
    """
    Process a single file

    :param container: str, path and filename of container if the file is within
    a zip archive, None otherwise.
    :param filename: str, path and filename of file on disk, or within the container.
    :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
    """
    #TODO: replace print by writing to a provided output file (sys.stdout by default)
    if container:
        display_filename = '%s in %s' % (filename, container)
    else:
        display_filename = filename
    print '='*79
    print 'FILE:', display_filename
    vm = ViperMonkey()
    try:
        #TODO: handle olefile errors, when an OLE file is malformed
        vba = VBA_Parser(filename, data, relaxed=True)
        print 'Type:', vba.type
        if vba.detect_vba_macros():

            # Read in document metadata.
            try:
                ole = olefile.OleFileIO(filename)
                vba_library.meta = ole.get_metadata()
            except:
                vba_library.meta = {}

            # Parse the VBA streams.
            comp_modules = parse_streams(vba, strip_useless)
            for m in comp_modules:
                vm.add_compiled_module(m)

            # Pull out form variables.
            for (subfilename, stream_path, form_variables) in vba.extract_form_strings_extended():
                if form_variables is not None:
                    var_name = form_variables['name']
                    macro_name = stream_path
                    if ("/" in macro_name):
                        start = macro_name.rindex("/") + 1
                        macro_name = macro_name[start:]
                    global_var_name = (macro_name + "." + var_name).encode('ascii', 'ignore')
                    val = form_variables['value']
                    vm.globals[global_var_name.lower()] = val
                    log.debug("Added VBA form variable %r = %r to globals." % (global_var_name, val))
                
            print '-'*79
            print 'TRACING VBA CODE (entrypoint = Auto*):'
            vm.trace()
            # print table of all recorded actions
            print('Recorded Actions:')
            print(vm.dump_actions())

        else:
            print 'No VBA macros found.'
    except: #TypeError:
        #raise
        #TODO: print more info if debug mode
        #print sys.exc_value
        # display the exception with full stack trace for debugging, but do not stop:
        traceback.print_exc()
    print ''