def PrintStats(filename): if sys.platform == 'win32': if not pythoncom.StgIsStorageFile(filename): print "The file is not a storage file!" return # Open the file. flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE stg_ = pythoncom.StgOpenStorage(filename, None, flags) # Now see if the storage object supports Property Information. try: pss = stg_.QueryInterface(pythoncom.IID_IPropertySetStorage) except pythoncom.com_error: print "No summary information is available" return # Open the user defined properties. ps = pss.Open(FMTID_UserDefinedProperties) props = PIDSI_TITLE, PIDSI_SUBJECT, PIDSI_AUTHOR, PIDSI_CREATE_DTM data = ps.ReadMultiple(props) # Unpack the result into the items. title, subject, author, created = data print "Title:", title print "Subject:", subject print "Author:", author print "Created:", created.Format()
def structured_storage(filename): """Pick out info from MS documents with embedded structured storage(typically MS Word docs etc.) Returns a dictionary of information found """ if not pythoncom.StgIsStorageFile(filename): return {} flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE storage = pythoncom.StgOpenStorage(filename, None, flags) try: properties_storage = storage.QueryInterface(pythoncom.IID_IPropertySetStorage) except pythoncom.com_error: return {} property_sheet = properties_storage.Open(FMTID_USER_DEFINED_PROPERTIES) try: data = property_sheet.ReadMultiple(PROPERTIES) finally: property_sheet = None title, subject, author, created_on, keywords, comments, template_used, \ updated_by, edited_on, printed_on, saved_on, \ n_pages, n_words, n_characters, \ application = data result = {} if title: result['title'] = title if subject: result['subject'] = subject if author: result['author'] = author if created_on: result['created_on'] = created_on if keywords: result['keywords'] = keywords if comments: result['comments'] = comments if template_used: result['template_used'] = template_used if updated_by: result['updated_by'] = updated_by if edited_on: result['edited_on'] = edited_on if printed_on: result['printed_on'] = printed_on if saved_on: result['saved_on'] = saved_on if n_pages: result['n_pages'] = n_pages if n_words: result['n_words'] = n_words if n_characters: result['n_characters'] = n_characters if application: result['application'] = application return result
def _bind_to_filter(self, fileName): """ See if the file is a structured storage file or a normal file and then return an ifilter interface by calling the appropriate bind/load function """ if pythoncom.StgIsStorageFile(fileName): self.stg = pythoncom.StgOpenStorage(fileName, None, storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE) try: self.f = ifilter.BindIFilterFromStorage(self.stg) except pythoncom.com_error, e: if e[0] == -2147467262: # 0x80004002: # no interface, try the load interface (this happens for some MSoft files) self.f = ifilter.LoadIFilter(fileName) else: raise
def get_ifilter_for_file(filename, log=log): """ Deal with structured storage file if possible. See http://msdn2.microsoft.com/en-us/library/aa380369.aspx """ if pythoncom.StgIsStorageFile(filename): storage_init_flags = STGM_READ | STGM_SHARE_DENY_WRITE stg = pythoncom.StgOpenStorage(filename, None, storage_init_flags) try: filt = ifilter.BindIFilterFromStorage(stg) except pythoncom.com_error, e: if e[0] == -2147467262: filt = load_ifilter(filename, log=log) else: raise
def readOfficeProperties(self, shellName): if not pythoncom.StgIsStorageFile(filename): self.abstract = 'not a Storage file' flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE stg = pythoncom.StgOpenStorage(filename, None, flags) try: pss = stg.QueryInterface(pythoncom.IID_IPropertySetStorage) except pythoncom.com_error: self.abstract = '' return # open properties ps = pss.Open(FMTID_UserDefinedProperties) props = PIDSI_TITLE, PIDSI_SUBJECT, PIDSI_KEYWORDS self.title, self.abstract, keywordText = ps.ReadMultiple(props) self.keywords = string.split(keywordText, ',')