def fload(self,buf="",parent=None,package=None): self.pname = os.path.split(self.fname)[1] f = None if buf == "": offset = 0 f = open(self.fname,"rb") buf = f.read() if buf[0:7] == "\0\0IIXPR" or buf[0:7] == "\0\0MMXPR": self.type = qxp.open(self, buf, parent) return 0 if buf[:8] == "\x89PNG\x0d\x0a\x1a\x0a": self.type = pngot.open(self, buf, parent) return 0 if buf[0:6] == "\x1aWLF10": self.type = vfb.open(self, buf, parent) return 0 if buf[0:6] == "<?xml " or buf[0:14] == "\xff\xfe<\0?\0x\0m\0l\0 \0" or buf[0:14] == "\xfe\xff\0<\0?\0x\0m\0l\0 ": self.type = otxml.open(buf, self, parent) return 0 if buf[0:8] == "CPT9FILE": self.type = cpt.open(buf, self, parent) return 0 if buf[0:8] == "VjCD0100": self.type = chdraw.open(self, buf, parent) return 0 if buf[0:4] == "EVHD": self.type = yep.parse(self, buf, parent) return 0 if buf[0:4] == "XPIH": self.type = yep.parse_ppi(self, buf, parent) return 0 if buf[0:5].lower() == "{\\rtf": self.type = rtf.open(buf, self, parent) return 0 if buf[0:8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": self.type = ole.ole_open(buf, self, parent) return 0 if buf[0:18] == "Visio (TM) Drawing": self.type = vsd.parse(self, buf, parent) return 0 if buf[0:2] == "\x50\xc3": self.type = "CLP" clp.open (buf,self, parent) return 0 if buf[0:6] == "VCLMTF": self.type = "SVM" svm.open (buf,self, parent) return 0 if buf[:4] == "\x12\x90\xa8\x7f": nki.open(self,buf,parent) return 0 # This one should be before CDR to properly handle v17 if parent != None: parname = self.model.get_value(parent,0) if parname == "[content]/dataFileList.dat": print "Found XMLish CDR version" self.wtable = self.model.get_value(parent,3).split("\n") #elif ("[content/" in parname or "[font]/" in parname) and ".dat" in parname: elif ".dat" in parname: if self.wdata == None: self.wdata = {} p = parname.rfind("/") self.wdata[parname[p+1:]] = parent if buf[0:4] == "RIFF" and buf[8:11].lower() == "cdr": self.type = "CDR%x"%(ord(buf[11])-0x30) print ('Probably CDR') cdr.cdr_open(buf,self, parent) print (self.version) return 0 if buf[0:4] == "RIFF" and buf[8:11] == "CMX": self.type = "CMX" cdr.cdr_open(buf,self, parent,"cmx") return 0 if buf[0:2] == "WL": self.type = "CDR2" wld.open (buf,self, parent) return 0 if buf[0:2] == "\xcc\xdc": self.type = "CPL" cpl.open (buf,self, parent) return 0 if buf[0:4] == "8BGR": self.type = "BGR" abr.abr_open(buf,self, parent,"bgr") return 0 if buf[4:8] == "8BIM": self.type = "ABR" abr.abr_open(buf,self, parent,"abr") return 0 if buf[0:4] == "\xd7\xcd\xc6\x9a": self.type = "APWMF" mf.mf_open(buf,self, parent) print ("Aldus Placeable WMF") return 0 if buf[0:8] == "\x4c\x00\x52\x00\x46\x00\x00\x00": self.type = "LRF" lrf.open(buf, self, parent) print ("LRF") return 0 if buf[0:6] == "\x01\x00\x09\x00\x00\x03": self.type = "WMF" print ("Probably WMF") mf.mf_open(buf,self, parent) return 0 if buf[40:44] == "\x20\x45\x4d\x46": self.type = "EMF" print ("Probably EMF") mf.mf_open(buf,self, parent) return 0 if buf[0:2] =="KF" and buf[2] != "\x00": self.type = "CDW" print ("Probably CDW") cdw.open(buf,self, parent) return 0 if buf[0:4] == "CAT " and buf[0x8:0xc] == "REX2": self.type = "REX2" print ("Probably REX2") rx2.open(buf,self, parent) return 0 if buf[0:20] == "Kaydara FBX Binary ": self.type = "FBX" print ("Probably FBX") fbx.open(buf,self, parent) return 0 if buf[4:19] == "Standard Jet DB" or buf[4:19] == "Standard ACE DB": self.type = "MDB" print ("Probably MDB") mdb.parse (buf,self, parent) return 0 if buf[0:4] == "\x50\x4b\x03\x04": self.type = "PKZIP" print ("Probably PK-ZIP") if f: f.close() pkzip.open (self.fname,self, parent) return 0 palmtype = buf[0x3c:0x44] if palmtype in palm.palm_types.keys(): self.type = "PALM" print ("Probably Palm e-book") palm.open(buf, self, parent, palmtype) return 0 if buf[2:10] == 'BOOKDOUG': self.type = 'IMP' print ('Probably SoftBook e-book') sbimp.open(buf, self, parent) return 0 if buf[0:8] == 'ITOLITLS': self.type = 'LIT' print ('Probably LIT') lit.open(buf,self,parent) return 0 if buf[0:6] == 'bplist': self.type = 'PLIST' print ('Probably PLIST') plist.open(buf,self,parent) return 0 fh_off = buf.find('FreeHand') if buf[0:3] == 'AGD': agd_off = 0 agd_ver = ord(buf[agd_off+3]) try: self.type = "FH" print ("Probably Freehand") fh.fh_open(buf,self) return 0 except: print ("Check for Freehand failed...") elif fh_off != -1: agd_off = buf.find('AGD') if agd_off > fh_off: agd_ver = ord(buf[agd_off+3]) try: self.type = "FH" print ("Probably Freehand 9+") fh.fh_open(buf,self, parent) return 0 except: print ("Check for Freehand 9+ failed...") if buf[0:4] == "FHD2" or buf[0:4] == "acf3": self.type = "FH12" fh12.fh_open(buf, self, parent, 0) return 0 if buf[0:2] == "FH": fh_off = buf.find('FHDocHeader') if fh_off != -1: try: self.type = "FH" print ("Probably Freehand <5") fh.fh_open(buf, self, parent, 0) return 0 except: print ("Check for Freehand <5 failed...") if buf[8:11] == 'xV4': self.type = 'ZMF' print ('Probably Zoner Draw 4+') zmf.zmf4_open(buf, self, parent) return 0 # NOTE: 0x29a is 666 in decimal .-) if buf[0:2] == '\x9a\x02' and buf[2:4] in ['\01\0', '\02\0', '\03\0', '\04\0']: self.type = 'ZBR' print ('Probably Zebra Metafile') zbr.open(buf, self, parent) return 0 if buf[0:9] == 'ZonerBMIa': self.type = 'BMI' print ('Probably Zoner Bitmap') bmi.open(buf, self, parent) return 0 size = (ord(buf[1]) | (ord(buf[2]) << 8)) + 4 if buf[0] == '\0' and (size == len(buf) or (size < len(buf) and buf[4:7] == "\x80\x80\x04")): self.type = 'IWA' if not self.subtype: self.subtype = iwa.detect(package) print('Probably Apple iWork file') iwa.open(buf, self, parent, self.subtype) return 0 if buf[0:0x17] == 'Software602\r\nCalc602 v.' and (buf[0x1c:0x24] == 'Tabulka\x1a' or buf[0x1c:0x21] == 'Graf\x1a'): self.type = 'C602' print('Probably C602 file') if buf[0x1c] == 'T': c602.parse_spreadsheet(buf, self, parent) else: c602.parse_chart(buf, self, parent) return 0 if re.match('@[A-Z]{2} .*?\\r\\n', buf) and buf[-1] == '\x1a': self.type = 'T602' print('Probably T602 file') t602.parse(buf, self, parent) return 0 if buf[0:4] == "\0\0\x02\0": if (buf[4]=='\x20' or buf[4]=='\x21') and buf[5]=='\x51': self.type = 'QUWQ' print('Probably Quattro Wq file') quattro_wq.wq_open(self, buf, parent) return 0 # QuarkXPress 1.x data fork if buf[0:4] == '\x00\x1c\x00\x1c' or buf[0:4] == '\x00\x20\x00\x20': try: qxp.open_v1(self,buf,parent) self.type = 'QXP5' return 0 except: print ("Failed after attempt to parse as QXP1...") if buf[0:4] == '\x00\x26\x00\x26': try: qxp.open_v2(self,buf,parent) self.type = 'QXP5' return 0 except: print ("Failed after attempt to parse as QXP2...") if parent == None: parent = add_pgiter(self, "File", "file","unknown",buf) # Likely false detection for DRW if buf[0:3] == "\x01\xff\x02": try: drw.open(self,buf,parent) self.model.set_value(parent, 0, "DRW") except: print ("Failed after attempt to parse as DRW...") if buf[0:4] == "\xe7\xac\x2c\x00": self.type = 'pub1' print('Probably publisher 1 file') publisher1.publisher_open(self, buf, parent) return 0
def my_open(buf, page, parent=None): dircache = {} vbaiter = None docdata = "" docdataiter = None tbliter = None dirflag = 0 ftype = "" try: if parent is None: gsffilename = page.fname else: # need to save tmp file to pass to gsf gsffilename = "tmp%s" % time.time() f = open(gsffilename, "wb") f.write(buf) f.close() gsfout = subprocess.check_output(["gsf", "list", gsffilename]) print gsfout print "-----------------" for i in gsfout.split("\n")[1:-1]: if i[0] == "f": # gsf sometimes lists date even for files. Or, rather, it # seems that it misrepresents empty dirs as (empty) files. # I have observed this with 'Objects' in many .pub files. if i[5] != ' ': fullname = " ".join(i.split()[4:]) else: fullname = " ".join(i.split()[2:]) if not len(fullname): fullname = " ".join(i.split()[2:]) if "/" in fullname: fns = fullname.split("/") cdir = "/".join(fns[:-1]) fn = fns[-1] else: fn = fullname cdir = "" if len(fn) and ord(fn[0]) < 32: fn = fn[1:] pn = None if cdir: cdir_to_treeiter(page, parent, cdir, dircache) pn = dircache["/" + cdir] data = subprocess.check_output( ["gsf", "cat", gsffilename, fullname]) iter1 = add_pgiter(page, fn, "ole", fn, data, pn) if fn == "DesignerDoc": ftype = "dsf" page.model.set_value(iter1, 1, ("dsf", dirflag)) dsf.open(page, data, iter1) if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0: ftype = "escher" page.model.set_value(iter1, 1, ("escher", dirflag)) escher.parse( page.model, data, iter1, "pub" ) # currently I don't parse it automagically for MSDOC if fn == "MagicTab": ftype = "wls" page.model.set_value(iter1, 1, ("wls", dirflag)) wls.parse(page, data, iter1) if fn == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1, 1, ("wt602", dirflag)) wt602.parse(page, data, iter1) elif fullname.split('/')[0] == "OleObjects": # Nested OLE objects (or images) in WT602 wt602.parse_object(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1, 1, ("quill", dirflag)) quill.parse(page, data, iter1) if fn == "Contents": if data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1, 1, ("pub", dirflag)) pub.parse(page, data, iter1) if fn == "VisioDocument": ftype = "vsd" page.model.set_value(iter1, 1, ("vsd", dirflag)) # level = 1? vsd.parse(page, data, iter1) if fn == "PageMaker": ftype = "pm" page.model.set_value(iter1, 1, ("pm", dirflag)) pm6.open(page, data, iter1) if fn == "WordDocument": ftype = "doc" page.model.set_value(iter1, 1, ("doc", dirflag)) #level = 1 doc.parse(page, data, iter1) if fn == "1Table" or fn == "0Table": page.wtable = iter1 if fn == "Data" and page.type == "DOC": page.wdata = iter1 if fn == "Book" or fn == "Workbook": page.model.set_value(iter1, 1, ("xls", dirflag)) ftype = xls.parse(page, data, iter1) if fn == "PowerPoint Document" or fn == "Pictures": ftype = "ppt" page.model.set_value(iter1, 1, ("ppt", dirflag)) ppt.parse(page, data, iter1) if fn == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1, 1, ("qpw", dirflag)) qpw.parse(page, data, iter1) if fn == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp": ppp.parse(page, data, iter1, fn) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if fn == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn == "VBA": page.type = "vba" ftype = "vba" #if (ftype == "vba" and fn == "dir") or if "vba/dir" in fullname.lower(): page.model.set_value(iter1, 1, ("vba", dirflag)) vbaiter = iter1 vbadata = data if "SummaryInformation" in fn: page.model.set_value(iter1, 1, ("ole", "propset")) if parent is None: try: os.remove(gsffilename) except: pass else: if i.split()[2] == "VBA": page.type = "vba" ftype = "vba" if vbaiter != None: vba.parse(page, vbadata, vbaiter) except subprocess.CalledProcessError: print "Failed to run gsf. Please install libgsf." return ftype = "TEST" return ftype
def gsf_get_children(page, infile, parent, ftype, dirflag=0): vbaiter = None docdata = "" docdataiter = None tbliter = None for i in range(infile.num_children()): infchild = infile.child_by_index(i) infname = infile.name_by_index(i) chsize = infchild.size() # print "Name ", infname, dirflag if ord(infname[0]) < 32: infname = infname[1:] if infname == "dir": infuncomp = infchild.uncompress() data = infuncomp.read(infuncomp.size()) else: data = infchild.read(chsize) iter1 = add_pgiter(page, infname, "ole", dirflag, data) if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize > 0: ftype = "escher" page.model.set_value(iter1, 1, ("escher", dirflag)) escher.parse( page.model, data, iter1, "pub") # currently I don't parse it automagically for MSDOC if infname == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1, 1, ("wt602", dirflag)) wt602.parse(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1, 1, ("quill", dirflag)) quill.parse(page, data, iter1) if infname == "Contents": if data and data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1, 1, ("pub", dirflag)) pub.parse(page, data, iter1) if infname == "VisioDocument": ftype = "vsd" page.model.set_value(iter1, 1, ("vsd", dirflag)) # level = 1? # choose vsd or vsd2 vsd.parse(page, data, iter1) if infname == "PageMaker": ftype = "pm" page.model.set_value(iter1, 1, ("pm", dirflag)) pm6.open(page, data, iter1) if infname == "WordDocument": ftype = "doc" page.model.set_value(iter1, 1, ("doc", dirflag)) #level = 1 doc.parse(page, data, iter1) if infname == "1Table" or infname == "0Table": page.wtable = iter1 if infname == "Data": page.wdata = iter1 if infname == "Book" or infname == "Workbook": page.model.set_value(iter1, 1, ("xls", dirflag)) ftype = xls.parse(page, data, iter1) if infname == "PowerPoint Document" or infname == "Pictures" and data != None: ftype = "ppt" page.model.set_value(iter1, 1, ("ppt", dirflag)) ppt.parse(page, data, iter1) if infname == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1, 1, ("qpw", dirflag)) qpw.parse(page, data, iter1) if infname == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp": ppp.parse(page, data, iter1, infname) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if infname == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname == "VBA": page.type = ftype ftype = "vba" if ftype == "vba" and infname == "dir": page.model.set_value(iter1, 1, ("vba", dirflag)) vbaiter = iter1 vbadata = data if (infile.num_children() > 0): page.model.set_value(iter1, 1, (ftype, 1)) gsf_get_children(page, infchild, iter1, ftype, 0) if "SummaryInformation" in infname: page.model.set_value(iter1, 1, ("ole", "propset")) if vbaiter != None: vba.parse(page, vbadata, vbaiter) return ftype
def my_open (buf,page,parent=None): dircache = {} vbaiter = None docdata = "" docdataiter = None tbliter = None dirflag=0 ftype = "" tmpfile = None try: if parent is None: gsffilename = page.fname else: # need to save tmp file to pass to gsf (tmpfd, tmpfile) = mkstemp() gsffilename = tmpfile os.write(tmpfd, buf) os.close(tmpfd) gsfout = subprocess.check_output(["gsf", "list", gsffilename]) print gsfout print "-----------------" for i in gsfout.split("\n")[1:-1]: if i[0] == "f": # gsf sometimes lists date even for files. Or, rather, it # seems that it misrepresents empty dirs as (empty) files. # I have observed this with 'Objects' in many .pub files. if i[5] != ' ': fullname = " ".join(i.split()[4:]) else: fullname = " ".join(i.split()[2:]) if not len(fullname): fullname = " ".join(i.split()[2:]) if "/" in fullname: fns = fullname.split("/") cdir = "/".join(fns[:-1]) fn = fns[-1] else: fn = fullname cdir = "" if len(fn) and ord(fn[0]) < 32: fn = fn[1:] if cdir: cdir_to_treeiter(page,parent,cdir,dircache) pn = dircache["/"+cdir] else: pn = parent data = subprocess.check_output(["gsf", "cat", gsffilename, fullname]) iter1 = add_pgiter(page,fn,"ole",fn,data,pn) if fn == "DesignerDoc": ftype = "dsf" page.model.set_value(iter1,1,("dsf",dirflag)) dsf.open (page, data, iter1) if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0: ftype = "escher" page.model.set_value(iter1,1,("escher",dirflag)) escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC if fn == "MagicTab": ftype = "wls" page.model.set_value(iter1,1,("wls",dirflag)) wls.parse (page,data,iter1) if fn == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1,1,("wt602",dirflag)) wt602.parse (page,data,iter1) elif fullname.split('/')[0] == "OleObjects": # Nested OLE objects (or images) in WT602 wt602.parse_object(page, data, iter1) else: ftype = "quill" page.model.set_value(iter1,1,("quill",dirflag)) quill.parse (page,data,iter1) if fn == "Contents": if data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1,1,("pub",dirflag)) page.appcontentdoc=pub.PublisherContentDoc(page,iter1) page.appcontentdoc.parse(data) if fn == "VisioDocument": ftype = "vsd" page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1? vsd.parse (page, data, iter1) if fn == "PageMaker": ftype = "pm" page.model.set_value(iter1,1,("pm",dirflag)) pm6.open (page, data, iter1) if fn == "WordDocument": ftype = "doc" page.model.set_value(iter1,1,("doc",dirflag)) #level = 1 doc.parse (page, data, iter1) if fn == "1Table" or fn == "0Table": page.wtable = iter1 if fn == "Data" and page.type == "DOC": page.wdata = iter1 if fn == "Book" or fn == "Workbook": page.model.set_value(iter1,1,("xls",dirflag)) ftype = xls.parse (page, data, iter1) if fn == "PowerPoint Document" or fn == "Pictures": ftype = "ppt" page.model.set_value(iter1,1,("ppt",dirflag)) ppt.parse (page, data, iter1) if fn == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1,1,("qpw",dirflag)) qpw.parse (page, data, iter1) if fn == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp": ppp.parse(page,data,iter1,fn) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if fn == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, fn) if fn[-4:] == '.BMI' and fullname.split('/')[0] == 'Bitmaps': ftype = 'bmi' bmi.open(data, page, iter1) if fn == "VBA": page.type = "vba" ftype = "vba" #if (ftype == "vba" and fn == "dir") or if "vba/dir" in fullname.lower(): page.model.set_value(iter1,1,("vba",dirflag)) vbaiter = iter1 vbadata = data if "SummaryInformation" in fn: page.model.set_value(iter1,1,("ole","propset")) else: if i.split()[2] == "VBA": page.type = "vba" ftype = "vba" if vbaiter != None: vba.parse (page, vbadata, vbaiter) except subprocess.CalledProcessError: print "Failed to run gsf. Please install libgsf." if tmpfile: try: os.remove(tmpfile) except: pass return ftype
def gsf_get_children(page,infile,parent,ftype,dirflag=0): vbaiter = None docdata = "" docdataiter = None tbliter = None for i in range(infile.num_children()): infchild = infile.child_by_index(i) infname = infile.name_by_index(i) chsize = infchild.size() # print "Name ", infname, dirflag if ord(infname[0]) < 32: infname = infname[1:] if infname == "dir": infuncomp = infchild.uncompress() data = infuncomp.read(infuncomp.size()) else: data = infchild.read(chsize) iter1 = add_pgiter (page, infname, "ole", dirflag, data) if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize>0: ftype = "escher" page.model.set_value(iter1,1,("escher",dirflag)) escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC if infname == "CONTENTS": if data[6:11] == "WT602": ftype = "wt602" page.model.set_value(iter1,1,("wt602",dirflag)) wt602.parse (page,data,iter1) else: ftype = "quill" page.model.set_value(iter1,1,("quill",dirflag)) quill.parse (page,data,iter1) if infname == "Contents": if data and data[:2] == "\xe8\xac": # take signature into account ftype = "pub" page.model.set_value(iter1,1,("pub",dirflag)) pub.parse (page,data,iter1) if infname == "VisioDocument": ftype = "vsd" page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1? # choose vsd or vsd2 vsd.parse (page, data, iter1) if infname == "PageMaker": ftype = "pm" page.model.set_value(iter1,1,("pm",dirflag)) pm6.open (page, data, iter1) if infname == "WordDocument": ftype = "doc" page.model.set_value(iter1,1,("doc",dirflag)) #level = 1 doc.parse (page, data, iter1) if infname == "1Table" or infname == "0Table": page.wtable = iter1 if infname == "Data": page.wdata = iter1 if infname == "Book" or infname == "Workbook": page.model.set_value(iter1,1,("xls",dirflag)) ftype = xls.parse (page, data, iter1) if infname == "PowerPoint Document" or infname == "Pictures" and data != None: ftype = "ppt" page.model.set_value(iter1,1,("ppt",dirflag)) ppt.parse (page, data, iter1) if infname == "NativeContent_MAIN": ftype = "qpw" page.model.set_value(iter1,1,("qpw",dirflag)) qpw.parse (page, data, iter1) if infname == "Signature" and data[:4] == '\x60\x67\x01\x00': ftype = "ppp" #PagePlus OLE version (9.x?) if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp": ppp.parse(page,data,iter1,infname) # I've no idea if this is really the signature, but it is # present in all files I've seen so far if infname == "Header" and data[0xc:0xf] == 'xV4': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname[-4:] == '.zmf': ftype = 'zmf' zmf.zmf2_open(page, data, iter1, infname) if infname == "VBA": page.type = ftype ftype = "vba" if ftype == "vba" and infname == "dir": page.model.set_value(iter1,1,("vba",dirflag)) vbaiter = iter1 vbadata = data if (infile.num_children()>0): page.model.set_value(iter1,1,(ftype,1)) gsf_get_children(page,infchild,iter1,ftype,0) if "SummaryInformation" in infname: page.model.set_value(iter1,1,("ole","propset")) if vbaiter != None: vba.parse (page, vbadata, vbaiter) return ftype
def fload(self,buf="",parent=None,package=None): self.pname = os.path.split(self.fname)[1] if buf == "": offset = 0 f = open(self.fname,"rb") buf = f.read() if buf[2:8] == "IIXPR3": self.type = qxp.open(self, buf, parent) return 0 if buf[:8] == "\x89PNG\x0d\x0a\x1a\x0a": self.type = pngot.open(self, buf, parent) return 0 if buf[0:6] == "\x1aWLF10": self.type = vfb.open(self, buf, parent) return 0 if buf[0:6] == "<?xml " or buf[0:14] == "\xff\xfe<\0?\0x\0m\0l\0 \0" or buf[0:14] == "\xfe\xff\0<\0?\0x\0m\0l\0 ": self.type = otxml.open(buf, self, parent) return 0 if buf[0:8] == "CPT9FILE": self.type = cpt.open(buf, self, parent) return 0 if buf[0:8] == "VjCD0100": self.type = chdraw.open(self, buf, parent) return 0 if buf[0:4] == "EVHD": self.type = yep.parse(self, buf, parent) return 0 if buf[0:4] == "XPIH": self.type = yep.parse_ppi(self, buf, parent) return 0 if buf[0:5].lower() == "{\\rtf": self.type = rtf.open(buf, self, parent) return 0 if buf[0:8] == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": self.type = ole.ole_open(buf, self, parent) return 0 if buf[0:18] == "Visio (TM) Drawing": self.type = vsd.parse(self, buf, parent) return 0 if buf[0:2] == "\x50\xc3": self.type = "CLP" clp.open (buf,self, parent) return 0 if buf[0:6] == "VCLMTF": self.type = "SVM" svm.open (buf,self, parent) return 0 if buf[:4] == "\x12\x90\xa8\x7f": nki.open(self,buf,parent) return 0 # This one should be before CDR to properly handle v17 if parent != None: parname = self.model.get_value(parent,0) if parname == "[content]/dataFileList.dat": print "Found XMLish CDR version" self.wtable = self.model.get_value(parent,3).split("\n") elif "[content]/" in parname and ".dat" in parname: if self.wdata == None: self.wdata = {} p = parname.rfind("/") self.wdata[parname[p+1:]] = parent if buf[0:4] == "RIFF" and buf[8:11].lower() == "cdr": self.type = "CDR%x"%(ord(buf[11])-0x30) print 'Probably CDR', cdr.cdr_open(buf,self, parent) print self.version return 0 if buf[0:4] == "RIFF" and buf[8:11] == "CMX": self.type = "CMX" cdr.cdr_open(buf,self, parent,"cmx") return 0 if buf[0:2] == "WL": self.type = "CDR2" wld.open (buf,self, parent) return 0 if buf[0:2] == "\xcc\xdc": self.type = "CPL" cpl.open (buf,self, parent) return 0 if buf[0:4] == "8BGR": self.type = "BGR" abr.abr_open(buf,self, parent,"bgr") return 0 if buf[4:8] == "8BIM": self.type = "ABR" abr.abr_open(buf,self, parent,"abr") return 0 if buf[0:4] == "\xd7\xcd\xc6\x9a": self.type = "APWMF" mf.mf_open(buf,self, parent) print "Aldus Placeable WMF" return 0 if buf[0:8] == "\x4c\x00\x52\x00\x46\x00\x00\x00": self.type = "LRF" lrf.open(buf, self, parent) print "LRF" return 0 if buf[0:6] == "\x01\x00\x09\x00\x00\x03": self.type = "WMF" print "Probably WMF" mf.mf_open(buf,self, parent) return 0 if buf[40:44] == "\x20\x45\x4d\x46": self.type = "EMF" print "Probably EMF" mf.mf_open(buf,self, parent) return 0 if buf[0:2] =="KF" and buf[2] != "\x00": self.type = "CDW" print "Probably CDW" cdw.open(buf,self, parent) return 0 if buf[0:4] == "CAT " and buf[0x8:0xc] == "REX2": self.type = "REX2" print "Probably REX2" rx2.open(buf,self, parent) return 0 if buf[0:20] == "Kaydara FBX Binary ": self.type = "FBX" print "Probably FBX" fbx.open(buf,self, parent) return 0 if buf[4:19] == "Standard Jet DB" or buf[4:19] == "Standard ACE DB": self.type = "MDB" print "Probably MDB" mdb.parse (buf,self, parent) return 0 if buf[0:4] == "\x50\x4b\x03\x04": self.type = "PKZIP" print "Probably PK-ZIP" f.close() pkzip.open (self.fname,self, parent) return 0 pdbtype = buf[0x3c:0x44] if pdbtype in pdb.pdb_types.keys(): self.type = "PDB" print "Probably Palm e-book" pdb.open(buf, self, parent, pdbtype) return 0 if buf[2:10] == 'BOOKDOUG': self.type = 'IMP' print 'Probably SoftBook e-book' sbimp.open(buf, self, parent) return 0 if buf[0:8] == 'ITOLITLS': self.type = 'LIT' print 'Probably LIT' lit.open(buf,self,parent) return 0 if buf[0:6] == 'bplist': self.type = 'PLIST' print 'Probably PLIST' plist.open(buf,self,parent) return 0 fh_off = buf.find('FreeHand') if buf[0:3] == 'AGD': agd_off = 0 agd_ver = ord(buf[agd_off+3]) try: self.type = "FH" print "Probably Freehand" fh.fh_open(buf,self) return 0 except: print "Check for Freehand failed..." elif fh_off != -1: agd_off = buf.find('AGD') if agd_off > fh_off: agd_ver = ord(buf[agd_off+3]) try: self.type = "FH" print "Probably Freehand 9+" fh.fh_open(buf,self, parent) return 0 except: print "Check for Freehand 9+ failed..." else: fh_off = buf.find('FHDocHeader') if fh_off != -1: if buf[0:2] == "FH": self.type = "FH" print "Probably Freehand <5" fh.fh_open(buf, self, parent, 0) return 0 if buf[0:4] == "FHD2" or buf[0:4] == "acf3": self.type = "FH12" fh12.fh_open(buf, self, parent, 0) return 0 if buf[8:11] == 'xV4': self.type = 'ZMF' print 'Probably Zoner Draw 4+' zmf.zmf4_open(buf, self, parent) return 0 # NOTE: 0x29a is 666 in decimal .-) if buf[0:2] == '\x9a\x02' and buf[2:4] in ['\01\0', '\02\0', '\03\0', '\04\0']: self.type = 'ZBR' print 'Probably Zebra Metafile' zbr.open(buf, self, parent) return 0 size = (ord(buf[1]) | (ord(buf[2]) << 8)) + 4 if buf[0] == '\0' and (size == len(buf) or (size < len(buf) and buf[4:7] == "\x80\x80\x04")): self.type = 'IWA' if not self.subtype: self.subtype = iwa.detect(package) print('Probably Apple iWork file') iwa.open(buf, self, parent, self.subtype) return 0 if buf[0:0x17] == 'Software602\r\nCalc602 v.' and (buf[0x1c:0x24] == 'Tabulka\x1a' or buf[0x1c:0x21] == 'Graf\x1a'): self.type = 'C602' print('Probably C602 file') if buf[0x1c] == 'T': c602.parse_spreadsheet(buf, self, parent) else: c602.parse_chart(buf, self, parent) return 0 if parent == None: parent = add_pgiter(self, "File", "file","unknown",buf) # Likely false detection for DRW if buf[0:3] == "\x01\xff\x02": try: drw.open(self,buf,parent) self.model.set_value(parent, 0, "DRW") except: print "Failed after attempt to parse as DRW..." return 0