def fileFormat_scanner(fileName): try: oleFile = OleFileIO(fileName) enum_streams = oleFile.listdir() for s in enum_streams: if s == ["\x05SummaryInformation"]: print("Summary Informations Available") properties = oleFile.getproperties(s) if 0x12 in properties: appName = properties[0x12] if 0x13 in properties: if properties[0x13] & 1: print("Document is Encrypted") if s == ['WordDocument']: s_word = oleFile.openstream(['WordDocument']) s_word.read(10) temp16 = unpack("H", s_word.read(2))[0] fEncrypted = (temp16 & 0x0100) >> 8 if fEncrypted: print("Word Document Encrypted") s_word.close() except: print("Error While Processing OLE Streams") return False return True
def read(file): """Returns a sequence of objects from an Altium *.SchDoc schematic file """ ole = OleFileIO(file) stream = ole.openstream("FileHeader") objects = list() while True: length = stream.read(4) if not length: break (length, ) = struct.unpack("<I", length) properties = stream.read(length - 1) obj = dict() for property in properties.split(b"|"): if not property: # Most (but not all) property lists are # prefixed with a pipe "|", # so ignore an empty property before the prefix continue (name, value) = property.split(b"=", 1) obj[name.decode("ascii")] = value objects.append(obj) # Skip over null terminator byte stream.seek(+1, SEEK_CUR) return objects
def read(file): """Returns a sequence of objects from an Altium *.SchDoc schematic file """ ole = OleFileIO(file) stream = ole.openstream("FileHeader") objects = list() while True: length = stream.read(4) if not length: break (length,) = struct.unpack("<I", length) properties = stream.read(length - 1) obj = dict() for property in properties.split(b"|"): if not property: # Most (but not all) property lists are # prefixed with a pipe "|", # so ignore an empty property before the prefix continue (name, value) = property.split(b"=", 1) obj[name.decode("ascii")] = value objects.append(obj) # Skip over null terminator byte stream.seek(+1, SEEK_CUR) return objects
def main(): # initialize OLE file parser global ole parser = ArgumentParser() parser.add_argument("file") args = parser.parse_args() # open file by filename ole = OleFileIO(args.file) content = ole.listdir() print "File contents:" print content, "\n" # parse FileHeader objects = read("FileHeader") result = {} result["FileHeader"] = objects # parse all other contents for doc in content: if len(doc) > 1: path = "/".join(doc) print path if not doc[0] in result: result[doc[0]] = {} result[doc[0]][doc[1]] = read(path) else: result[doc[0]] = read(doc) # output parsed content as formatted JSON print json_dumps(result, indent=4)
def __init__(self, filename, shape, process_func=None, dtype=None, as_grey=False): self._filename = filename self._ole = OleFileIO(self._filename) self._streams = self._ole.listdir() self._dtype = np.uint16 self._im_sz = shape self._toc = [] for stream in self._streams: if stream[0] != 'Image': continue m = re.match('Item\((\d+)\)', stream[1]) if m is None: continue self._toc.append(int(m.group(1))) self._len = max(self._toc) # self._toc is not used hereafter, but it could be. self._validate_process_func(process_func) self._as_grey(as_grey, process_func)
def read(file): """Parses an Altium ".SchDoc" schematic file and returns a Sheet object """ ole = OleFileIO(file) stream = ole.openstream("FileHeader") records = iter_records(stream) records = (parse_properties(stream, record) for record in records) header = next(records) parse_header(header) header.check_unknown() sheet = Object(properties=next(records)) objects = [sheet] for properties in records: obj = Object(properties=properties) objects[obj.properties.get_int("OWNERINDEX")].children.append(obj) objects.append(obj) if ole.exists("Additional"): stream = ole.openstream("Additional") records = iter_records(stream) records = (parse_properties(stream, record) for record in records) header = next(records) parse_header(header) header.check_unknown() for properties in records: obj = Object(properties=properties) owner = obj.properties.get_int("OWNERINDEX") objects[owner].children.append(obj) objects.append(obj) storage_stream = ole.openstream("Storage") records = iter_records(storage_stream) header = parse_properties(storage_stream, next(records)) header.check("HEADER", b"Icon storage") header.get_int("WEIGHT") header.check_unknown() storage_files = dict() for [type, length] in records: if type != 1: warn("Unexpected record type {} in Storage".format(type)) continue header = storage_stream.read(1) if header != b"\xD0": warn("Unexpected Storage record header byte " + repr(header)) continue [length] = storage_stream.read(1) filename = storage_stream.read(length) pos = storage_stream.tell() if storage_files.setdefault(filename, pos) != pos: warn("Duplicate Storage record for " + repr(filename)) streams = set(map(tuple, ole.listdir())) streams -= {("FileHeader", ), ("Additional", ), ("Storage", )} if streams: warn("Extra OLE file streams: " + ", ".join(map("/".join, streams))) return (sheet, storage_stream, storage_files)
def __init__(self, olefile, path='', parent=None): if not hasattr(olefile, 'openstream'): from OleFileIO_PL import isOleFile if not isOleFile(olefile): from hwp5.errors import InvalidOleStorageError errormsg = 'Not an OLE2 Compound Binary File.' raise InvalidOleStorageError(errormsg) from OleFileIO_PL import OleFileIO olefile = OleFileIO(olefile) OleStorageItem.__init__(self, olefile, path, parent)
def read(file): """Parses an Altium ".SchDoc" schematic file and returns a Sheet object """ ole = OleFileIO(file) stream = ole.openstream("FileHeader") records = iter_records(stream) records = (parse_properties(stream, record) for record in records) header = next(records) parse_header(header) header.check_unknown() sheet = Object(properties=next(records)) objects = [sheet] for properties in records: obj = Object(properties=properties) objects[obj.properties.get_int("OWNERINDEX")].children.append(obj) objects.append(obj) if ole.exists("Additional"): stream = ole.openstream("Additional") records = iter_records(stream) records = (parse_properties(stream, record) for record in records) header = next(records) parse_header(header) header.check_unknown() for properties in records: obj = Object(properties=properties) owner = obj.properties.get_int("OWNERINDEX") objects[owner].children.append(obj) objects.append(obj) storage_stream = ole.openstream("Storage") records = iter_records(storage_stream) header = parse_properties(storage_stream, next(records)) header.check("HEADER", b"Icon storage") header.get_int("WEIGHT") header.check_unknown() storage_files = dict() for [type, length] in records: if type != 1: warn("Unexpected record type {} in Storage".format(type)) continue header = storage_stream.read(1) if header != b"\xD0": warn("Unexpected Storage record header byte " + repr(header)) continue [length] = storage_stream.read(1) filename = storage_stream.read(length) pos = storage_stream.tell() if storage_files.setdefault(filename, pos) != pos: warn("Duplicate Storage record for " + repr(filename)) streams = set(map(tuple, ole.listdir())) streams -= {("FileHeader",), ("Additional",), ("Storage",)} if streams: warn("Extra OLE file streams: " + ", ".join(map("/".join, streams))) return (sheet, storage_stream, storage_files)
def main(file): with open(file, "rb") as file: ole = OleFileIO(file) doc = ole.openstream("WordDocument") base = FibBase.unpack(doc.read(FibBase.size)) [wIdent, _, _, _, _, bits_fm, _, _, _, _] = base assert wIdent == WORD_BINARY_FILE fWhichTblStm = bits_fm >> WHICH_TBL_STM_BIT & 1 [csw] = unsigned2.unpack(doc.read(2)) doc.seek(csw * 2, SEEK_CUR) [cslw] = unsigned2.unpack(doc.read(2)) doc.seek(cslw * 4, SEEK_CUR) [cbRgFcLcb] = unsigned2.unpack(doc.read(2)) cbRgFcLcb *= 8 assert cbRgFcLcb >= FibRgFcLcb97.size fibRgFcLcb97 = FibRgFcLcb97.unpack(doc.read(FibRgFcLcb97.size)) [fcPlcfBtePapx, lcbPlcfBtePapx, fcClx, lcbClx] = fibRgFcLcb97 table = ole.openstream("{}Table".format(fWhichTblStm)) out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors, newline="", line_buffering=stdout.line_buffering) try: writer = csv.writer(out) row = list() cell = None pieces = Pieces(doc, table, fcClx, lcbClx) i = 0 while i < len(pieces): # For each piece starting a paragraph piece = pieces[i] paras = iter_paras_from(doc, ole, table, fcPlcfBtePapx, lcbPlcfBtePapx, piece.byte_offset) while True: # For each paragraph in the current piece # Scan ahead to find how many pieces span this paragraph j = i scan_piece = piece while True: [end, in_table, is_ttp] = next(paras) end -= scan_piece.byte_offset if end <= scan_piece.bytes_remaining: break while True: # For each piece without paragraph info j += 1 piece = pieces[j] paras = iter_paras_from(doc, table, fcPlcfBtePapx, lcbPlcfBtePapx, scan_piece.byte_offset) if paras is not None: break # Found a paragraph spanning pieces i-j if is_ttp: writer.writerow(row) row.clear() if in_table and not is_ttp: if not cell: cell = StringIO() while i < j: copyfileobj(piece.get_reader(), cell) i += 1 piece = pieces[i] assert end reader = piece.get_reader(end - piece.code_size) copyfileobj(reader, cell) mark = piece.get_reader(piece.code_size).read() if mark == "\x07": row.append(cell.getvalue()) cell = None else: cell.write(mark) else: assert not row assert not cell if i < j: i = j piece = pieces[i] piece.skip(end) if not piece.bytes_remaining: break i += 1 assert not row assert not cell finally: out.detach() for [exctype, msg] in ole.parsing_issues: print("{}: {}".format(exctype.__name__, msg), file=stderr)
class ZVI(FramesSequence): """Read ZVI image sequences (single files containing many images) into an iterable object that returns images as numpy arrays. WARNING: This code is alpha code. It cannot interpret the ZVI metadata. Thus, the image shape must be specified manually (see example below) and the data type (16-bit grayscale) is hard-coded in this implementation. This reader, which relies on OleFileIO and PIL/Pillow, is tested on Zeiss AxioVision ZVI files. It should also read Olympus FluoView OIB files and others based on the legacy OLE file format. Parameters ---------- filename : string process_func : function, optional callable with signalture `proc_img = process_func(img)`, which will be applied to the data from each frame as_grey : boolean, optional Convert color images to greyscale. False by default. May not be used in conjection with process_func. Examples -------- >>> video = ZVI('filename.zvi', (660, 492)) # must specify shape manually >>> imshow(video[0]) # Show the first frame. >>> imshow(video[-1]) # Show the last frame. >>> imshow(video[1][0:10, 0:10]) # Show one corner of the second frame. >>> for frame in video[:]: ... # Do something with every frame. >>> for frame in video[10:20]: ... # Do something with frames 10-20. >>> for frame in video[[5, 7, 13]]: ... # Do something with frames 5, 7, and 13. >>> frame_count = len(video) # Number of frames in video >>> frame_shape = video.frame_shape # Pixel dimensions of video """ @classmethod def class_exts(cls): # TODO extend this set to match reality return {'zvi'} | super(ZVI, cls).class_exts() def __init__(self, filename, shape, process_func=None, dtype=None, as_grey=False): self._filename = filename self._ole = OleFileIO(self._filename) self._streams = self._ole.listdir() self._dtype = np.uint16 self._im_sz = shape self._toc = [] for stream in self._streams: if stream[0] != 'Image': continue m = re.match('Item\((\d+)\)', stream[1]) if m is None: continue self._toc.append(int(m.group(1))) self._len = max(self._toc) # self._toc is not used hereafter, but it could be. self._validate_process_func(process_func) self._as_grey(as_grey, process_func) def get_frame(self, j): stream_label = ['Image', 'Item({0})'.format(j), 'Contents'] data = self._ole.openstream(stream_label).read() img = Image.fromstring('I;16L', self._im_sz, data) # Mysteriously, the image comes in rolled by 162 pixels! Roll it back. arr = np.roll(np.asarray(img, dtype=self._dtype), -162) return Frame(self.process_func(arr), frame_no=j) @property def pixel_type(self): return self._dtype @property def frame_shape(self): return self._im_sz def __len__(self): return self._len def __repr__(self): # May be overwritten by subclasses return """<Frames> Source: {filename} Length: {count} frames Frame Shape: {w} x {h} Pixel Datatype: {dtype}""".format(w=self.frame_shape[0], h=self.frame_shape[1], count=len(self), filename=self._filename, dtype=self.pixel_type)
def is_hwp5file(filename): if not isOleFile(filename): return False olefile = OleFileIO(filename) return olefile.exists('FileHeader')
def ole2Explore(pyew, doprint=True, args=None): """ Get the OLE2 directory """ if not pyew.physical: filename = tempfile.mkstemp("pyew")[1] f = file(filename, "wb") f.write(pyew.getBuffer()) f.close() else: filename = pyew.filename ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT) ole.dumpdirectory() i = 0 for streamname in ole.listdir(): if streamname[-1][0] == "\005": print streamname, ": properties" props = ole.getproperties(streamname) props = props.items() props.sort() for k, v in props: #[PL]: avoid to display too large or binary values: if isinstance(v, basestring): if len(v) > 50: v = v[:50] # quick and dirty binary check: for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20, 21,22,23,24,25,26,27,28,29,30,31): if chr(c) in v: v = '(binary data)' break print " ", k, v # Read all streams to check if there are errors: print '\nChecking streams...' for streamname in ole.listdir(): # print name using repr() to convert binary chars to \xNN: print '-', repr('/'.join(streamname)),'-', st_type = ole.get_type(streamname) if st_type == STGTY_STREAM: print 'size %d' % ole.get_size(streamname) # just try to read stream in memory: ole.openstream(streamname) else: print 'NOT a stream : type=%d' % st_type print '' #[PL] Test a few new methods: root = ole.get_rootentry_name() print 'Root entry name: "%s"' % root if ole.exists('worddocument'): print "This is a Word document." print "type of stream 'WordDocument':", ole.get_type('worddocument') print "size :", ole.get_size('worddocument') if ole.exists('macros/vba'): print "This document may contain VBA macros."
def ole2Explore(pyew): """ Get the OLE2 directory """ if not pyew.physical: filename = tempfile.mkstemp("pyew")[1] f = file(filename, "wb") f.write(pyew.getBuffer()) f.close() else: filename = pyew.filename ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT) ole.dumpdirectory() i = 0 for streamname in ole.listdir(): if streamname[-1][0] == "\005": print streamname, ": properties" props = ole.getproperties(streamname) props = props.items() props.sort() for k, v in props: #[PL]: avoid to display too large or binary values: if isinstance(v, basestring): if len(v) > 50: v = v[:50] # quick and dirty binary check: for c in (1, 2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31): if chr(c) in v: v = '(binary data)' break print " ", k, v # Read all streams to check if there are errors: print '\nChecking streams...' for streamname in ole.listdir(): # print name using repr() to convert binary chars to \xNN: print '-', repr('/'.join(streamname)), '-', st_type = ole.get_type(streamname) if st_type == STGTY_STREAM: print 'size %d' % ole.get_size(streamname) # just try to read stream in memory: ole.openstream(streamname) else: print 'NOT a stream : type=%d' % st_type print '' #[PL] Test a few new methods: root = ole.get_rootentry_name() print 'Root entry name: "%s"' % root if ole.exists('worddocument'): print "This is a Word document." print "type of stream 'WordDocument':", ole.get_type('worddocument') print "size :", ole.get_size('worddocument') if ole.exists('macros/vba'): print "This document may contain VBA macros."