Beispiel #1
0
def my_open(buf, page, parent=None):
    dircache = {}
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    dirflag = 0
    ftype = ""
    try:
        if parent is None:
            gsffilename = page.fname
        else:
            # need to save tmp file to pass to gsf
            gsffilename = "tmp%s" % time.time()
            f = open(gsffilename, "wb")
            f.write(buf)
            f.close()
        gsfout = subprocess.check_output(["gsf", "list", gsffilename])
        print gsfout
        print "-----------------"
        for i in gsfout.split("\n")[1:-1]:
            if i[0] == "f":
                # gsf sometimes lists date even for files. Or, rather, it
                # seems that it misrepresents empty dirs as (empty) files.
                # I have observed this with 'Objects' in many .pub files.
                if i[5] != ' ':
                    fullname = " ".join(i.split()[4:])
                else:
                    fullname = " ".join(i.split()[2:])
                if not len(fullname):
                    fullname = " ".join(i.split()[2:])
                if "/" in fullname:
                    fns = fullname.split("/")
                    cdir = "/".join(fns[:-1])
                    fn = fns[-1]
                else:
                    fn = fullname
                    cdir = ""
                if len(fn) and ord(fn[0]) < 32:
                    fn = fn[1:]
                pn = None
                if cdir:
                    cdir_to_treeiter(page, parent, cdir, dircache)
                    pn = dircache["/" + cdir]
                data = subprocess.check_output(
                    ["gsf", "cat", gsffilename, fullname])
                iter1 = add_pgiter(page, fn, "ole", fn, data, pn)

                if fn == "DesignerDoc":
                    ftype = "dsf"
                    page.model.set_value(iter1, 1, ("dsf", dirflag))
                    dsf.open(page, data, iter1)

                if (fn == "EscherStm"
                        or fn == "EscherDelayStm"):  # and infchild.size()>0:
                    ftype = "escher"
                    page.model.set_value(iter1, 1, ("escher", dirflag))
                    escher.parse(
                        page.model, data, iter1, "pub"
                    )  # currently I don't parse it automagically for MSDOC
                if fn == "MagicTab":
                    ftype = "wls"
                    page.model.set_value(iter1, 1, ("wls", dirflag))
                    wls.parse(page, data, iter1)
                if fn == "CONTENTS":
                    if data[6:11] == "WT602":
                        ftype = "wt602"
                        page.model.set_value(iter1, 1, ("wt602", dirflag))
                        wt602.parse(page, data, iter1)
                    elif fullname.split('/')[0] == "OleObjects":
                        # Nested OLE objects (or images) in WT602
                        wt602.parse_object(page, data, iter1)
                    else:
                        ftype = "quill"
                        page.model.set_value(iter1, 1, ("quill", dirflag))
                        quill.parse(page, data, iter1)
                if fn == "Contents":
                    if data[:2] == "\xe8\xac":  # take signature into account
                        ftype = "pub"
                        page.model.set_value(iter1, 1, ("pub", dirflag))
                        pub.parse(page, data, iter1)
                if fn == "VisioDocument":
                    ftype = "vsd"
                    page.model.set_value(iter1, 1,
                                         ("vsd", dirflag))  # level = 1?
                    vsd.parse(page, data, iter1)
                if fn == "PageMaker":
                    ftype = "pm"
                    page.model.set_value(iter1, 1, ("pm", dirflag))
                    pm6.open(page, data, iter1)
                if fn == "WordDocument":
                    ftype = "doc"
                    page.model.set_value(iter1, 1,
                                         ("doc", dirflag))  #level = 1
                    doc.parse(page, data, iter1)
                if fn == "1Table" or fn == "0Table":
                    page.wtable = iter1
                if fn == "Data" and page.type == "DOC":
                    page.wdata = iter1
                if fn == "Book" or fn == "Workbook":
                    page.model.set_value(iter1, 1, ("xls", dirflag))
                    ftype = xls.parse(page, data, iter1)
                if fn == "PowerPoint Document" or fn == "Pictures":
                    ftype = "ppt"
                    page.model.set_value(iter1, 1, ("ppt", dirflag))
                    ppt.parse(page, data, iter1)
                if fn == "NativeContent_MAIN":
                    ftype = "qpw"
                    page.model.set_value(iter1, 1, ("qpw", dirflag))
                    qpw.parse(page, data, iter1)
                if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
                    ftype = "ppp"  #PagePlus OLE version (9.x?)
                if (fn == "contents"
                        or fn == "SCFFPreview") and ftype == "ppp":
                    ppp.parse(page, data, iter1, fn)

                # I've no idea if this is really the signature, but it is
                # present in all files I've seen so far
                if fn == "Header" and data[0xc:0xf] == 'xV4':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)
                if fn[-4:] == '.zmf':
                    ftype = 'zmf'
                    zmf.zmf2_open(page, data, iter1, fn)

                if fn == "VBA":
                    page.type = "vba"
                    ftype = "vba"
                #if (ftype == "vba" and fn == "dir") or
                if "vba/dir" in fullname.lower():
                    page.model.set_value(iter1, 1, ("vba", dirflag))
                    vbaiter = iter1
                    vbadata = data

                if "SummaryInformation" in fn:
                    page.model.set_value(iter1, 1, ("ole", "propset"))

                if parent is None:
                    try:
                        os.remove(gsffilename)
                    except:
                        pass
            else:
                if i.split()[2] == "VBA":
                    page.type = "vba"
                    ftype = "vba"
        if vbaiter != None:
            vba.parse(page, vbadata, vbaiter)

    except subprocess.CalledProcessError:
        print "Failed to run gsf. Please install libgsf."
        return

    ftype = "TEST"
    return ftype
Beispiel #2
0
def my_open (buf,page,parent=None):
	dircache = {}
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	dirflag=0
	ftype = ""
	tmpfile = None
	try:
		if parent is None:
			gsffilename = page.fname
		else:
			# need to save tmp file to pass to gsf
			(tmpfd, tmpfile) = mkstemp()
			gsffilename = tmpfile
			os.write(tmpfd, buf)
			os.close(tmpfd)
		gsfout = subprocess.check_output(["gsf", "list", gsffilename])
		print gsfout
		print "-----------------"
		for i in gsfout.split("\n")[1:-1]:
			if i[0] == "f":
				# gsf sometimes lists date even for files. Or, rather, it
				# seems that it misrepresents empty dirs as (empty) files.
				# I have observed this with 'Objects' in many .pub files.
				if i[5] != ' ':
					fullname = " ".join(i.split()[4:])
				else:
					fullname = " ".join(i.split()[2:])
				if not len(fullname):
					fullname = " ".join(i.split()[2:])
				if "/" in fullname:
					fns = fullname.split("/")
					cdir = "/".join(fns[:-1])
					fn = fns[-1]
				else:
					fn = fullname
					cdir = ""
				if len(fn) and ord(fn[0]) < 32: 
					fn = fn[1:]
				if cdir:
					cdir_to_treeiter(page,parent,cdir,dircache)
					pn = dircache["/"+cdir]
				else:
					pn = parent
				data = subprocess.check_output(["gsf", "cat", gsffilename, fullname])
				iter1 = add_pgiter(page,fn,"ole",fn,data,pn)
				
				if fn == "DesignerDoc":
					ftype = "dsf"
					page.model.set_value(iter1,1,("dsf",dirflag))
					dsf.open (page, data, iter1)
				
				if (fn == "EscherStm" or fn == "EscherDelayStm"): # and infchild.size()>0:
					ftype = "escher"
					page.model.set_value(iter1,1,("escher",dirflag))
					escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
				if fn == "MagicTab":
					ftype = "wls"
					page.model.set_value(iter1,1,("wls",dirflag))
					wls.parse (page,data,iter1)
				if fn == "CONTENTS":
					if data[6:11] == "WT602":
						ftype = "wt602"
						page.model.set_value(iter1,1,("wt602",dirflag))
						wt602.parse (page,data,iter1)
					elif fullname.split('/')[0] == "OleObjects":
						# Nested OLE objects (or images) in WT602
						wt602.parse_object(page, data, iter1)
					else:
						ftype = "quill"
						page.model.set_value(iter1,1,("quill",dirflag))
						quill.parse (page,data,iter1)
				if fn == "Contents":
					if data[:2] == "\xe8\xac": # take signature into account
						ftype = "pub"
						page.model.set_value(iter1,1,("pub",dirflag))
						page.appcontentdoc=pub.PublisherContentDoc(page,iter1)
						page.appcontentdoc.parse(data)
				if fn == "VisioDocument":
					ftype = "vsd"
					page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
					vsd.parse (page, data, iter1)
				if fn == "PageMaker":
					ftype = "pm"
					page.model.set_value(iter1,1,("pm",dirflag))
					pm6.open (page, data, iter1)
				if fn == "WordDocument":
					ftype = "doc"
					page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
					doc.parse (page, data, iter1)
				if fn == "1Table" or fn == "0Table":
					page.wtable = iter1
				if fn == "Data" and page.type == "DOC":
					page.wdata = iter1
				if fn == "Book" or fn == "Workbook":
					page.model.set_value(iter1,1,("xls",dirflag))
					ftype = xls.parse (page, data, iter1)
				if fn == "PowerPoint Document" or fn == "Pictures":
					ftype = "ppt"
					page.model.set_value(iter1,1,("ppt",dirflag))
					ppt.parse (page, data, iter1)
				if fn == "NativeContent_MAIN":
					ftype = "qpw"
					page.model.set_value(iter1,1,("qpw",dirflag))
					qpw.parse (page, data, iter1)
				if fn == "Signature" and data[:4] == '\x60\x67\x01\x00':
					ftype = "ppp"  #PagePlus OLE version (9.x?)
				if (fn == "contents" or fn == "SCFFPreview") and ftype == "ppp":
					ppp.parse(page,data,iter1,fn)
				
				# I've no idea if this is really the signature, but it is
				# present in all files I've seen so far
				if fn == "Header" and data[0xc:0xf] == 'xV4':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.zmf':
					ftype = 'zmf'
					zmf.zmf2_open(page, data, iter1, fn)
				if fn[-4:] == '.BMI' and fullname.split('/')[0] == 'Bitmaps':
					ftype = 'bmi'
					bmi.open(data, page, iter1)

				if fn == "VBA":
					page.type = "vba"
					ftype = "vba"
				#if (ftype == "vba" and fn == "dir") or 
				if "vba/dir" in fullname.lower():
					page.model.set_value(iter1,1,("vba",dirflag))
					vbaiter = iter1
					vbadata = data
				
				if "SummaryInformation" in fn:
					page.model.set_value(iter1,1,("ole","propset"))
			else:
				if i.split()[2] == "VBA":
					page.type = "vba"
					ftype = "vba"
		if vbaiter != None:
			vba.parse (page, vbadata, vbaiter)
	
	except subprocess.CalledProcessError:
		print "Failed to run gsf. Please install libgsf."

	if tmpfile:
		try: os.remove(tmpfile)
		except: pass

	return ftype
Beispiel #3
0
def gsf_get_children(page, infile, parent, ftype, dirflag=0):
    vbaiter = None
    docdata = ""
    docdataiter = None
    tbliter = None
    for i in range(infile.num_children()):
        infchild = infile.child_by_index(i)

        infname = infile.name_by_index(i)
        chsize = infchild.size()
        #		print "Name ", infname, dirflag

        if ord(infname[0]) < 32:
            infname = infname[1:]

        if infname == "dir":
            infuncomp = infchild.uncompress()
            data = infuncomp.read(infuncomp.size())
        else:
            data = infchild.read(chsize)

        iter1 = add_pgiter(page, infname, "ole", dirflag, data)

        if (infname == "EscherStm"
                or infname == "EscherDelayStm") and chsize > 0:
            ftype = "escher"
            page.model.set_value(iter1, 1, ("escher", dirflag))
            escher.parse(
                page.model, data, iter1,
                "pub")  # currently I don't parse it automagically for MSDOC
        if infname == "CONTENTS":
            if data[6:11] == "WT602":
                ftype = "wt602"
                page.model.set_value(iter1, 1, ("wt602", dirflag))
                wt602.parse(page, data, iter1)
            else:
                ftype = "quill"
                page.model.set_value(iter1, 1, ("quill", dirflag))
                quill.parse(page, data, iter1)
        if infname == "Contents":
            if data and data[:2] == "\xe8\xac":  # take signature into account
                ftype = "pub"
                page.model.set_value(iter1, 1, ("pub", dirflag))
                pub.parse(page, data, iter1)
        if infname == "VisioDocument":
            ftype = "vsd"
            page.model.set_value(iter1, 1, ("vsd", dirflag))  # level = 1?
            # choose vsd or vsd2
            vsd.parse(page, data, iter1)
        if infname == "PageMaker":
            ftype = "pm"
            page.model.set_value(iter1, 1, ("pm", dirflag))
            pm6.open(page, data, iter1)
        if infname == "WordDocument":
            ftype = "doc"
            page.model.set_value(iter1, 1, ("doc", dirflag))  #level = 1
            doc.parse(page, data, iter1)
        if infname == "1Table" or infname == "0Table":
            page.wtable = iter1
        if infname == "Data":
            page.wdata = iter1

        if infname == "Book" or infname == "Workbook":
            page.model.set_value(iter1, 1, ("xls", dirflag))
            ftype = xls.parse(page, data, iter1)
        if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
            ftype = "ppt"
            page.model.set_value(iter1, 1, ("ppt", dirflag))
            ppt.parse(page, data, iter1)
        if infname == "NativeContent_MAIN":
            ftype = "qpw"
            page.model.set_value(iter1, 1, ("qpw", dirflag))
            qpw.parse(page, data, iter1)
        if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
            ftype = "ppp"  #PagePlus OLE version (9.x?)
        if (infname == "contents"
                or infname == "SCFFPreview") and ftype == "ppp":
            ppp.parse(page, data, iter1, infname)

        # I've no idea if this is really the signature, but it is
        # present in all files I've seen so far
        if infname == "Header" and data[0xc:0xf] == 'xV4':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)
        if infname[-4:] == '.zmf':
            ftype = 'zmf'
            zmf.zmf2_open(page, data, iter1, infname)

        if infname == "VBA":
            page.type = ftype
            ftype = "vba"
        if ftype == "vba" and infname == "dir":
            page.model.set_value(iter1, 1, ("vba", dirflag))
            vbaiter = iter1
            vbadata = data

        if (infile.num_children() > 0):
            page.model.set_value(iter1, 1, (ftype, 1))
            gsf_get_children(page, infchild, iter1, ftype, 0)
        if "SummaryInformation" in infname:
            page.model.set_value(iter1, 1, ("ole", "propset"))

    if vbaiter != None:
        vba.parse(page, vbadata, vbaiter)

    return ftype
Beispiel #4
0
def gsf_get_children(page,infile,parent,ftype,dirflag=0):
	vbaiter = None
	docdata = ""
	docdataiter = None
	tbliter = None
	for i in range(infile.num_children()):
		infchild = infile.child_by_index(i)

		infname = infile.name_by_index(i)
		chsize = infchild.size()
#		print "Name ", infname, dirflag

		if ord(infname[0]) < 32: 
			infname = infname[1:]


		if infname == "dir":
			infuncomp = infchild.uncompress()
			data = infuncomp.read(infuncomp.size())
		else:
			data = infchild.read(chsize)
			
		iter1 = add_pgiter (page, infname, "ole", dirflag, data)
			
		if (infname == "EscherStm" or infname == "EscherDelayStm") and chsize>0:
			ftype = "escher"
			page.model.set_value(iter1,1,("escher",dirflag))
			escher.parse (page.model,data,iter1,"pub") # currently I don't parse it automagically for MSDOC
		if infname == "CONTENTS":
			if data[6:11] == "WT602":
				ftype = "wt602"
				page.model.set_value(iter1,1,("wt602",dirflag))
				wt602.parse (page,data,iter1)
			else:
				ftype = "quill"
				page.model.set_value(iter1,1,("quill",dirflag))
				quill.parse (page,data,iter1)
		if infname == "Contents":
			if data and data[:2] == "\xe8\xac": # take signature into account
				ftype = "pub"
				page.model.set_value(iter1,1,("pub",dirflag))
				pub.parse (page,data,iter1)
		if infname == "VisioDocument":
			ftype = "vsd"
			page.model.set_value(iter1,1,("vsd",dirflag)) # level = 1?
		# choose vsd or vsd2
			vsd.parse (page, data, iter1)
		if infname == "PageMaker":
			ftype = "pm"
			page.model.set_value(iter1,1,("pm",dirflag))
			pm6.open (page, data, iter1)
		if infname == "WordDocument":
			ftype = "doc"
			page.model.set_value(iter1,1,("doc",dirflag)) #level = 1
			doc.parse (page, data, iter1)
		if infname == "1Table" or infname == "0Table":
			page.wtable = iter1
		if infname == "Data":
			page.wdata = iter1

		if infname == "Book" or infname == "Workbook":
			page.model.set_value(iter1,1,("xls",dirflag))
			ftype = xls.parse (page, data, iter1)
		if infname == "PowerPoint Document" or infname == "Pictures" and data != None:
			ftype = "ppt"
			page.model.set_value(iter1,1,("ppt",dirflag))
			ppt.parse (page, data, iter1)
		if infname == "NativeContent_MAIN":
			ftype = "qpw"
			page.model.set_value(iter1,1,("qpw",dirflag))
			qpw.parse (page, data, iter1)
		if infname == "Signature" and data[:4] == '\x60\x67\x01\x00':
			ftype = "ppp"  #PagePlus OLE version (9.x?)
		if (infname == "contents" or infname == "SCFFPreview") and ftype == "ppp":
			ppp.parse(page,data,iter1,infname)

		# I've no idea if this is really the signature, but it is
		# present in all files I've seen so far
		if infname == "Header" and data[0xc:0xf] == 'xV4':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)
		if infname[-4:] == '.zmf':
			ftype = 'zmf'
			zmf.zmf2_open(page, data, iter1, infname)

		if infname == "VBA":
			page.type = ftype
			ftype = "vba"
		if ftype == "vba" and infname == "dir":
			page.model.set_value(iter1,1,("vba",dirflag))
			vbaiter = iter1
			vbadata = data

		if (infile.num_children()>0):
			page.model.set_value(iter1,1,(ftype,1))
			gsf_get_children(page,infchild,iter1,ftype,0)
		if "SummaryInformation" in infname:
			page.model.set_value(iter1,1,("ole","propset"))

	if vbaiter != None:
		vba.parse (page, vbadata, vbaiter)

	return ftype