Python OleFileIO Examples, OleFileIO_PL.OleFileIO Python Examples

Example #1

0

Show file

File: pyOLEScanner.py Project: qpzm/PythonScripts

def fileFormat_scanner(fileName):

    try:
        oleFile = OleFileIO(fileName)
        enum_streams = oleFile.listdir()

        for s in enum_streams:
            if s == ["\x05SummaryInformation"]:
                print("Summary Informations Available")
                properties = oleFile.getproperties(s)
                if 0x12 in properties:
                    appName = properties[0x12]
                if 0x13 in properties:
                    if properties[0x13] & 1:
                        print("Document is Encrypted")
                if s == ['WordDocument']:
                    s_word = oleFile.openstream(['WordDocument'])
                    s_word.read(10)
                    temp16 = unpack("H", s_word.read(2))[0]
                    fEncrypted = (temp16 & 0x0100) >> 8
                    if fEncrypted:
                        print("Word Document Encrypted")
                    s_word.close()
    except:
        print("Error While Processing OLE Streams")
        return False

    return True

Example #2

0

Show file

File: altium.py Project: mervick/python-altium

def read(file):
    """Returns a sequence of objects from an Altium *.SchDoc schematic file
    """

    ole = OleFileIO(file)
    stream = ole.openstream("FileHeader")

    objects = list()
    while True:
        length = stream.read(4)
        if not length:
            break
        (length, ) = struct.unpack("<I", length)

        properties = stream.read(length - 1)
        obj = dict()
        for property in properties.split(b"|"):
            if not property:
                # Most (but not all) property lists are
                # prefixed with a pipe "|",
                # so ignore an empty property before the prefix
                continue

            (name, value) = property.split(b"=", 1)
            obj[name.decode("ascii")] = value

        objects.append(obj)

        # Skip over null terminator byte
        stream.seek(+1, SEEK_CUR)

    return objects

Example #3

0

Show file

File: altium.py Project: diyjack/python-altium

def read(file):
    """Returns a sequence of objects from an Altium *.SchDoc schematic file
    """
    
    ole = OleFileIO(file)
    stream = ole.openstream("FileHeader")
    
    objects = list()
    while True:
        length = stream.read(4)
        if not length:
            break
        (length,) = struct.unpack("<I", length)
        
        properties = stream.read(length - 1)
        obj = dict()
        for property in properties.split(b"|"):
            if not property:
                # Most (but not all) property lists are
                # prefixed with a pipe "|",
                # so ignore an empty property before the prefix
                continue
            
            (name, value) = property.split(b"=", 1)
            obj[name.decode("ascii")] = value
        
        objects.append(obj)
        
        # Skip over null terminator byte
        stream.seek(+1, SEEK_CUR)
    
    return objects

Example #4

0

Show file

File: pyOLEScanner.py Project: 0day1day/PythonScripts

def fileFormat_scanner(fileName):
    
    try:
        oleFile = OleFileIO(fileName)
        enum_streams = oleFile.listdir()
        
        for s in enum_streams:
            if s == ["\x05SummaryInformation"]:
                print("Summary Informations Available")
                properties = oleFile.getproperties(s)
                if 0x12 in properties:
                    appName = properties[0x12]
                if 0x13 in properties:
                    if properties[0x13] & 1:
                        print("Document is Encrypted")
                if s == ['WordDocument']:
                    s_word = oleFile.openstream(['WordDocument'])
                    s_word.read(10)
                    temp16 = unpack("H", s_word.read(2))[0]
                    fEncrypted = (temp16 & 0x0100) >> 8
                    if fEncrypted:
                        print("Word Document Encrypted")
                    s_word.close()                    
    except:
        print("Error While Processing OLE Streams")
        return False

    return True

Example #5

0

Show file

File: altium2json.py Project: mervick/python-altium

def main():
    # initialize OLE file parser
    global ole
    parser = ArgumentParser()
    parser.add_argument("file")
    args = parser.parse_args()

    # open file by filename
    ole = OleFileIO(args.file)
    content = ole.listdir()
    print "File contents:"
    print content, "\n"

    # parse FileHeader
    objects = read("FileHeader")
    result = {}
    result["FileHeader"] = objects

    # parse all other contents
    for doc in content:
        if len(doc) > 1:
            path = "/".join(doc)
            print path
            if not doc[0] in result:
                result[doc[0]] = {}
            result[doc[0]][doc[1]] = read(path)
        else:
            result[doc[0]] = read(doc)

    # output parsed content as formatted JSON
    print json_dumps(result, indent=4)

Example #6

0

Show file

    def __init__(self,
                 filename,
                 shape,
                 process_func=None,
                 dtype=None,
                 as_grey=False):
        self._filename = filename
        self._ole = OleFileIO(self._filename)
        self._streams = self._ole.listdir()

        self._dtype = np.uint16

        self._im_sz = shape

        self._toc = []
        for stream in self._streams:
            if stream[0] != 'Image':
                continue
            m = re.match('Item\((\d+)\)', stream[1])
            if m is None:
                continue
            self._toc.append(int(m.group(1)))
        self._len = max(self._toc)
        # self._toc is not used hereafter, but it could be.

        self._validate_process_func(process_func)
        self._as_grey(as_grey, process_func)

Example #7

0

Show file

File: altium.py Project: wose/python-altium

def read(file):
    """Parses an Altium ".SchDoc" schematic file and returns a Sheet object
    """
    ole = OleFileIO(file)

    stream = ole.openstream("FileHeader")
    records = iter_records(stream)
    records = (parse_properties(stream, record) for record in records)
    header = next(records)
    parse_header(header)
    header.check_unknown()

    sheet = Object(properties=next(records))
    objects = [sheet]
    for properties in records:
        obj = Object(properties=properties)
        objects[obj.properties.get_int("OWNERINDEX")].children.append(obj)
        objects.append(obj)

    if ole.exists("Additional"):
        stream = ole.openstream("Additional")
        records = iter_records(stream)
        records = (parse_properties(stream, record) for record in records)
        header = next(records)
        parse_header(header)
        header.check_unknown()
        for properties in records:
            obj = Object(properties=properties)
            owner = obj.properties.get_int("OWNERINDEX")
            objects[owner].children.append(obj)
            objects.append(obj)

    storage_stream = ole.openstream("Storage")
    records = iter_records(storage_stream)
    header = parse_properties(storage_stream, next(records))
    header.check("HEADER", b"Icon storage")
    header.get_int("WEIGHT")
    header.check_unknown()
    storage_files = dict()
    for [type, length] in records:
        if type != 1:
            warn("Unexpected record type {} in Storage".format(type))
            continue
        header = storage_stream.read(1)
        if header != b"\xD0":
            warn("Unexpected Storage record header byte " + repr(header))
            continue
        [length] = storage_stream.read(1)
        filename = storage_stream.read(length)
        pos = storage_stream.tell()
        if storage_files.setdefault(filename, pos) != pos:
            warn("Duplicate Storage record for " + repr(filename))

    streams = set(map(tuple, ole.listdir()))
    streams -= {("FileHeader", ), ("Additional", ), ("Storage", )}
    if streams:
        warn("Extra OLE file streams: " + ", ".join(map("/".join, streams)))

    return (sheet, storage_stream, storage_files)

Example #8

0

Show file

File: olefileio.py Project: yarang/pyhwp

 def __init__(self, olefile, path='', parent=None):
     if not hasattr(olefile, 'openstream'):
         from OleFileIO_PL import isOleFile
         if not isOleFile(olefile):
             from hwp5.errors import InvalidOleStorageError
             errormsg = 'Not an OLE2 Compound Binary File.'
             raise InvalidOleStorageError(errormsg)
         from OleFileIO_PL import OleFileIO
         olefile = OleFileIO(olefile)
     OleStorageItem.__init__(self, olefile, path, parent)

Example #9

0

Show file

def read(file):
    """Parses an Altium ".SchDoc" schematic file and returns a Sheet object
    """
    ole = OleFileIO(file)
    
    stream = ole.openstream("FileHeader")
    records = iter_records(stream)
    records = (parse_properties(stream, record) for record in records)
    header = next(records)
    parse_header(header)
    header.check_unknown()
    
    sheet = Object(properties=next(records))
    objects = [sheet]
    for properties in records:
        obj = Object(properties=properties)
        objects[obj.properties.get_int("OWNERINDEX")].children.append(obj)
        objects.append(obj)
    
    if ole.exists("Additional"):
        stream = ole.openstream("Additional")
        records = iter_records(stream)
        records = (parse_properties(stream, record) for record in records)
        header = next(records)
        parse_header(header)
        header.check_unknown()
        for properties in records:
            obj = Object(properties=properties)
            owner = obj.properties.get_int("OWNERINDEX")
            objects[owner].children.append(obj)
            objects.append(obj)
    
    storage_stream = ole.openstream("Storage")
    records = iter_records(storage_stream)
    header = parse_properties(storage_stream, next(records))
    header.check("HEADER", b"Icon storage")
    header.get_int("WEIGHT")
    header.check_unknown()
    storage_files = dict()
    for [type, length] in records:
        if type != 1:
            warn("Unexpected record type {} in Storage".format(type))
            continue
        header = storage_stream.read(1)
        if header != b"\xD0":
            warn("Unexpected Storage record header byte " + repr(header))
            continue
        [length] = storage_stream.read(1)
        filename = storage_stream.read(length)
        pos = storage_stream.tell()
        if storage_files.setdefault(filename, pos) != pos:
            warn("Duplicate Storage record for " + repr(filename))
    
    streams = set(map(tuple, ole.listdir()))
    streams -= {("FileHeader",), ("Additional",), ("Storage",)}
    if streams:
        warn("Extra OLE file streams: " + ", ".join(map("/".join, streams)))
    
    return (sheet, storage_stream, storage_files)

Example #10

0

Show file

File: zvi_reader.py Project: danielballan/zvi_reader

    def __init__(self, filename, shape, process_func=None, dtype=None,
                 as_grey=False):
        self._filename = filename
        self._ole = OleFileIO(self._filename)
        self._streams = self._ole.listdir()

        self._dtype = np.uint16

        self._im_sz = shape

        self._toc = []
        for stream in self._streams:
            if stream[0] != 'Image':
                continue
            m = re.match('Item\((\d+)\)', stream[1])
            if m is None:
                continue
            self._toc.append(int(m.group(1)))
        self._len = max(self._toc)
        # self._toc is not used hereafter, but it could be.

        self._validate_process_func(process_func)
        self._as_grey(as_grey, process_func)

Example #11

0

Show file

File: word.py Project: vadmium/reveg-db

def main(file):
    with open(file, "rb") as file:
        ole = OleFileIO(file)
        doc = ole.openstream("WordDocument")
        base = FibBase.unpack(doc.read(FibBase.size))
        [wIdent, _, _, _, _, bits_fm, _, _, _, _] = base
        assert wIdent == WORD_BINARY_FILE
        fWhichTblStm = bits_fm >> WHICH_TBL_STM_BIT & 1
        [csw] = unsigned2.unpack(doc.read(2))
        doc.seek(csw * 2, SEEK_CUR)
        [cslw] = unsigned2.unpack(doc.read(2))
        doc.seek(cslw * 4, SEEK_CUR)
        [cbRgFcLcb] = unsigned2.unpack(doc.read(2))
        cbRgFcLcb *= 8
        assert cbRgFcLcb >= FibRgFcLcb97.size
        fibRgFcLcb97 = FibRgFcLcb97.unpack(doc.read(FibRgFcLcb97.size))
        [fcPlcfBtePapx, lcbPlcfBtePapx, fcClx, lcbClx] = fibRgFcLcb97
        table = ole.openstream("{}Table".format(fWhichTblStm))
        
        out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors,
            newline="", line_buffering=stdout.line_buffering)
        try:
            writer = csv.writer(out)
            row = list()
            cell = None
            
            pieces = Pieces(doc, table, fcClx, lcbClx)
            i = 0
            while i < len(pieces):  # For each piece starting a paragraph
                piece = pieces[i]
                paras = iter_paras_from(doc, ole, table,
                    fcPlcfBtePapx, lcbPlcfBtePapx, piece.byte_offset)
                while True:  # For each paragraph in the current piece
                    # Scan ahead to find how many pieces span this paragraph
                    j = i
                    scan_piece = piece
                    while True:
                        [end, in_table, is_ttp] = next(paras)
                        end -= scan_piece.byte_offset
                        if end <= scan_piece.bytes_remaining:
                            break
                        while True:  # For each piece without paragraph info
                            j += 1
                            piece = pieces[j]
                            paras = iter_paras_from(doc, table,
                                fcPlcfBtePapx, lcbPlcfBtePapx,
                                scan_piece.byte_offset)
                            if paras is not None:
                                break
                    
                    # Found a paragraph spanning pieces i-j
                    if is_ttp:
                        writer.writerow(row)
                        row.clear()
                    if in_table and not is_ttp:
                        if not cell:
                            cell = StringIO()
                        while i < j:
                            copyfileobj(piece.get_reader(), cell)
                            i += 1
                            piece = pieces[i]
                        assert end
                        reader = piece.get_reader(end - piece.code_size)
                        copyfileobj(reader, cell)
                        mark = piece.get_reader(piece.code_size).read()
                        if mark == "\x07":
                            row.append(cell.getvalue())
                            cell = None
                        else:
                            cell.write(mark)
                    else:
                        assert not row
                        assert not cell
                        if i < j:
                            i = j
                            piece = pieces[i]
                        piece.skip(end)
                    
                    if not piece.bytes_remaining:
                        break
                i += 1
            assert not row
            assert not cell
        finally:
            out.detach()
        
        for [exctype, msg] in ole.parsing_issues:
            print("{}: {}".format(exctype.__name__, msg), file=stderr)

Example #12

0

Show file

File: zvi_reader.py Project: danielballan/zvi_reader

class ZVI(FramesSequence):
    """Read ZVI image sequences (single files containing many images) into an
    iterable object that returns images as numpy arrays.

    WARNING: This code is alpha code. It cannot interpret the ZVI metadata.
    Thus, the image shape must be specified manually (see example below) and
    the data type (16-bit grayscale) is hard-coded in this implementation.

    This reader, which relies on OleFileIO and PIL/Pillow, is tested on
    Zeiss AxioVision ZVI files. It should also read Olympus FluoView OIB files
    and others based on the legacy OLE file format.

    Parameters
    ----------
    filename : string
    process_func : function, optional
        callable with signalture `proc_img = process_func(img)`,
        which will be applied to the data from each frame
    as_grey : boolean, optional
        Convert color images to greyscale. False by default.
        May not be used in conjection with process_func.

    Examples
    --------
    >>> video = ZVI('filename.zvi', (660, 492))  # must specify shape manually
    >>> imshow(video[0]) # Show the first frame.
    >>> imshow(video[-1]) # Show the last frame.
    >>> imshow(video[1][0:10, 0:10]) # Show one corner of the second frame.

    >>> for frame in video[:]:
    ...    # Do something with every frame.

    >>> for frame in video[10:20]:
    ...    # Do something with frames 10-20.

    >>> for frame in video[[5, 7, 13]]:
    ...    # Do something with frames 5, 7, and 13.

    >>> frame_count = len(video) # Number of frames in video
    >>> frame_shape = video.frame_shape # Pixel dimensions of video
    """
    @classmethod
    def class_exts(cls):
        # TODO extend this set to match reality
        return {'zvi'} | super(ZVI, cls).class_exts()

    def __init__(self, filename, shape, process_func=None, dtype=None,
                 as_grey=False):
        self._filename = filename
        self._ole = OleFileIO(self._filename)
        self._streams = self._ole.listdir()

        self._dtype = np.uint16

        self._im_sz = shape

        self._toc = []
        for stream in self._streams:
            if stream[0] != 'Image':
                continue
            m = re.match('Item\((\d+)\)', stream[1])
            if m is None:
                continue
            self._toc.append(int(m.group(1)))
        self._len = max(self._toc)
        # self._toc is not used hereafter, but it could be.

        self._validate_process_func(process_func)
        self._as_grey(as_grey, process_func)

    def get_frame(self, j):
        stream_label = ['Image', 'Item({0})'.format(j), 'Contents']
        data = self._ole.openstream(stream_label).read()
        img = Image.fromstring('I;16L', self._im_sz, data)
        # Mysteriously, the image comes in rolled by 162 pixels! Roll it back.
        arr = np.roll(np.asarray(img, dtype=self._dtype), -162)
        return Frame(self.process_func(arr), frame_no=j)

    @property
    def pixel_type(self):
        return self._dtype

    @property
    def frame_shape(self):
        return self._im_sz

    def __len__(self):
        return self._len

    def __repr__(self):
        # May be overwritten by subclasses
        return """<Frames>
Source: {filename}
Length: {count} frames
Frame Shape: {w} x {h}
Pixel Datatype: {dtype}""".format(w=self.frame_shape[0],
                                  h=self.frame_shape[1],
                                  count=len(self),
                                  filename=self._filename,
                                  dtype=self.pixel_type)

Example #13

0

Show file

File: filestructure.py Project: changwoo/pyhwp

def is_hwp5file(filename):
    if not isOleFile(filename):
        return False
    olefile = OleFileIO(filename)
    return olefile.exists('FileHeader')

Example #14

0

Show file

File: ole2.py Project: Anstep/pyew

def ole2Explore(pyew, doprint=True, args=None):
    """ Get the OLE2 directory """
    if not pyew.physical:
       filename = tempfile.mkstemp("pyew")[1]
       f = file(filename, "wb")
       f.write(pyew.getBuffer())
       f.close()
    else:
        filename = pyew.filename

    ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT)
    ole.dumpdirectory()
    i = 0
    for streamname in ole.listdir():
        if streamname[-1][0] == "\005":
            print streamname, ": properties"
            props = ole.getproperties(streamname)
            props = props.items()
            props.sort()
            for k, v in props:
                #[PL]: avoid to display too large or binary values:
                if isinstance(v, basestring):
                    if len(v) > 50:
                        v = v[:50]
                    # quick and dirty binary check:
                    for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20,
                        21,22,23,24,25,26,27,28,29,30,31):
                        if chr(c) in v:
                            v = '(binary data)'
                            break
                print "   ", k, v
                
        
    # Read all streams to check if there are errors:
    print '\nChecking streams...'
    for streamname in ole.listdir():
        # print name using repr() to convert binary chars to \xNN:
        print '-', repr('/'.join(streamname)),'-',
        st_type = ole.get_type(streamname)
        if st_type == STGTY_STREAM:
            print 'size %d' % ole.get_size(streamname)
            # just try to read stream in memory:
            ole.openstream(streamname)
        else:
            print 'NOT a stream : type=%d' % st_type
    print ''

    #[PL] Test a few new methods:
    root = ole.get_rootentry_name()
    print 'Root entry name: "%s"' % root
    if ole.exists('worddocument'):
        print "This is a Word document."
        print "type of stream 'WordDocument':", ole.get_type('worddocument')
        print "size :", ole.get_size('worddocument')
        if ole.exists('macros/vba'):
            print "This document may contain VBA macros."

Example #15

0

Show file

File: ole2.py Project: reynelda86/inguma

def ole2Explore(pyew):
    """ Get the OLE2 directory """
    if not pyew.physical:
        filename = tempfile.mkstemp("pyew")[1]
        f = file(filename, "wb")
        f.write(pyew.getBuffer())
        f.close()
    else:
        filename = pyew.filename

    ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT)
    ole.dumpdirectory()
    i = 0
    for streamname in ole.listdir():
        if streamname[-1][0] == "\005":
            print streamname, ": properties"
            props = ole.getproperties(streamname)
            props = props.items()
            props.sort()
            for k, v in props:
                #[PL]: avoid to display too large or binary values:
                if isinstance(v, basestring):
                    if len(v) > 50:
                        v = v[:50]
                    # quick and dirty binary check:
                    for c in (1, 2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 16, 17, 18,
                              19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
                              31):
                        if chr(c) in v:
                            v = '(binary data)'
                            break
                print "   ", k, v

    # Read all streams to check if there are errors:
    print '\nChecking streams...'
    for streamname in ole.listdir():
        # print name using repr() to convert binary chars to \xNN:
        print '-', repr('/'.join(streamname)), '-',
        st_type = ole.get_type(streamname)
        if st_type == STGTY_STREAM:
            print 'size %d' % ole.get_size(streamname)
            # just try to read stream in memory:
            ole.openstream(streamname)
        else:
            print 'NOT a stream : type=%d' % st_type
    print ''

    #[PL] Test a few new methods:
    root = ole.get_rootentry_name()
    print 'Root entry name: "%s"' % root
    if ole.exists('worddocument'):
        print "This is a Word document."
        print "type of stream 'WordDocument':", ole.get_type('worddocument')
        print "size :", ole.get_size('worddocument')
        if ole.exists('macros/vba'):
            print "This document may contain VBA macros."

Example #16

0

Show file

class ZVI(FramesSequence):
    """Read ZVI image sequences (single files containing many images) into an
    iterable object that returns images as numpy arrays.

    WARNING: This code is alpha code. It cannot interpret the ZVI metadata.
    Thus, the image shape must be specified manually (see example below) and
    the data type (16-bit grayscale) is hard-coded in this implementation.

    This reader, which relies on OleFileIO and PIL/Pillow, is tested on
    Zeiss AxioVision ZVI files. It should also read Olympus FluoView OIB files
    and others based on the legacy OLE file format.

    Parameters
    ----------
    filename : string
    process_func : function, optional
        callable with signalture `proc_img = process_func(img)`,
        which will be applied to the data from each frame
    as_grey : boolean, optional
        Convert color images to greyscale. False by default.
        May not be used in conjection with process_func.

    Examples
    --------
    >>> video = ZVI('filename.zvi', (660, 492))  # must specify shape manually
    >>> imshow(video[0]) # Show the first frame.
    >>> imshow(video[-1]) # Show the last frame.
    >>> imshow(video[1][0:10, 0:10]) # Show one corner of the second frame.

    >>> for frame in video[:]:
    ...    # Do something with every frame.

    >>> for frame in video[10:20]:
    ...    # Do something with frames 10-20.

    >>> for frame in video[[5, 7, 13]]:
    ...    # Do something with frames 5, 7, and 13.

    >>> frame_count = len(video) # Number of frames in video
    >>> frame_shape = video.frame_shape # Pixel dimensions of video
    """
    @classmethod
    def class_exts(cls):
        # TODO extend this set to match reality
        return {'zvi'} | super(ZVI, cls).class_exts()

    def __init__(self,
                 filename,
                 shape,
                 process_func=None,
                 dtype=None,
                 as_grey=False):
        self._filename = filename
        self._ole = OleFileIO(self._filename)
        self._streams = self._ole.listdir()

        self._dtype = np.uint16

        self._im_sz = shape

        self._toc = []
        for stream in self._streams:
            if stream[0] != 'Image':
                continue
            m = re.match('Item\((\d+)\)', stream[1])
            if m is None:
                continue
            self._toc.append(int(m.group(1)))
        self._len = max(self._toc)
        # self._toc is not used hereafter, but it could be.

        self._validate_process_func(process_func)
        self._as_grey(as_grey, process_func)

    def get_frame(self, j):
        stream_label = ['Image', 'Item({0})'.format(j), 'Contents']
        data = self._ole.openstream(stream_label).read()
        img = Image.fromstring('I;16L', self._im_sz, data)
        # Mysteriously, the image comes in rolled by 162 pixels! Roll it back.
        arr = np.roll(np.asarray(img, dtype=self._dtype), -162)
        return Frame(self.process_func(arr), frame_no=j)

    @property
    def pixel_type(self):
        return self._dtype

    @property
    def frame_shape(self):
        return self._im_sz

    def __len__(self):
        return self._len

    def __repr__(self):
        # May be overwritten by subclasses
        return """<Frames>
Source: {filename}
Length: {count} frames
Frame Shape: {w} x {h}
Pixel Datatype: {dtype}""".format(w=self.frame_shape[0],
                                  h=self.frame_shape[1],
                                  count=len(self),
                                  filename=self._filename,
                                  dtype=self.pixel_type)