def setPropertySetStreamAttributes(self, stream, args): propertySet = PropertySetStream(stream, OfficeDocumentSectionCLSID.keys()) for clsid in OfficeDocumentSectionCLSID.iterkeys(): section = propertySet.sectionCLSID(clsid) if section: (sectionName, sectionIDS) = OfficeDocumentSectionCLSID[clsid] mattr = VMap() for k, v in sectionIDS.iteritems(): Property = section.PropertyList.propertyID(k) if Property and Property.Variant.Value: p = section.PropertyList.propertyID(k).Variant.Value if p and isinstance(p, Variant): #Thumbnail is type node if v == 'Total editing time': #special case see msoshared.py p = Variant( str( datetime.timedelta( seconds=(p.value() / 10000000)))) elif v == 'Code page': codePage = p.value() if isinstance(codePage, long): self.codePage = 'cp' + str(codePage) elif self.codePage and (v == "Title" or v == "Subject" or v == "Author" or v == "Comments" or v == "Last Author"): p = Variant(p.value().decode( self.codePage).encode('UTF-8')) else: p = Variant(p) mattr[v] = p stream.setExtraAttributes(( sectionName, mattr, )) if not 'no-root_metadata' in args: self.extraAttr.append(( sectionName, stream.parent().name(), mattr, ))
def __init__(self, node, largs, mfsobj = None): self.node = node self.attr = {} self.extraAttr = [] try : self.cdh = CompoundDocumentHeader(node, mfsobj) self.cdh.parseDocument(not 'no-extraction' in largs) except : #error() raise Exception("Can't parse document") streams = self.cdh.streams() for stream in streams: if stream.objectType == "StreamObject": try: if stream.objectName == "WordDocument": if not 'no-extraction' in largs: wd = WordDocument(stream) if not 'no-text' in largs: wd.createTextNodes() if not 'no-pictures' in largs: wd.createPictureNodes() elif stream.objectName == "Pictures": if not ('no-pictures' in largs or 'no-extraction' in largs): ppt = PPT(stream) ppt.createPictureNodes() else: propertySet = PropertySetStream(stream, OfficeDocumentSectionCLSID.keys()) for clsid in OfficeDocumentSectionCLSID.iterkeys(): section = propertySet.sectionCLSID(clsid) if section: (sectionName, sectionIDS) = OfficeDocumentSectionCLSID[clsid] mattr = VMap() for k, v in sectionIDS.iteritems(): Property = section.PropertyList.propertyID(k) if Property and Property.Variant.Value: p = section.PropertyList.propertyID(k).Variant.Value if p and isinstance(p, Variant): #Thumbnail is type node if v == 'Total editing time': #special case see msoshared.py p = Variant(str(datetime.timedelta(seconds=(p.value()/10000000)))) else: p = Variant(p) mattr[v] = p stream.setExtraAttributes((sectionName, mattr,)) if not 'no-root_metadata' in largs: self.extraAttr.append((sectionName, stream.parent().name(), mattr,)) #except RuntimeError, e: #pass except : #error() pass if not 'no-extraction' in largs: del stream
def __init__(self, node, largs, mfsobj=None): self.node = node self.attr = {} self.extraAttr = [] try: self.cdh = CompoundDocumentHeader(node, mfsobj) self.cdh.parseDocument(not 'no-extraction' in largs) except: #error() raise Exception("Can't parse document") streams = self.cdh.streams() for stream in streams: if stream.objectType == "StreamObject": try: if stream.objectName == "WordDocument": if not 'no-extraction' in largs: wd = WordDocument(stream) if not 'no-text' in largs: wd.createTextNodes() if not 'no-pictures' in largs: wd.createPictureNodes() elif stream.objectName == "Pictures": if not ('no-pictures' in largs or 'no-extraction' in largs): ppt = PPT(stream) ppt.createPictureNodes() else: propertySet = PropertySetStream( stream, OfficeDocumentSectionCLSID.keys()) for clsid in OfficeDocumentSectionCLSID.iterkeys(): section = propertySet.sectionCLSID(clsid) if section: (sectionName, sectionIDS ) = OfficeDocumentSectionCLSID[clsid] mattr = VMap() for k, v in sectionIDS.iteritems(): Property = section.PropertyList.propertyID( k) if Property and Property.Variant.Value: p = section.PropertyList.propertyID( k).Variant.Value if p and isinstance( p, Variant ): #Thumbnail is type node if v == 'Total editing time': #special case see msoshared.py p = Variant( str( datetime.timedelta( seconds=( p.value() / 10000000)))) else: p = Variant(p) mattr[v] = p stream.setExtraAttributes(( sectionName, mattr, )) if not 'no-root_metadata' in largs: self.extraAttr.append(( sectionName, stream.parent().name(), mattr, )) #except RuntimeError, e: #pass except: #error() pass if not 'no-extraction' in largs: del stream