Python PDFParserの例、PDFCore.PDFParser Pythonの例

コード例 #1

0

ファイルを表示

ファイル: swf_mastah.py プロジェクト: 0day1day/pdfxray_public

def snatch(file, out):
    pdfParser = PDFParser()
    ret,pdf = pdfParser.parse(file, True, False)
    statsDict = pdf.getStats()
    objs = []
    count = 0
    for version in range(len(statsDict['Versions'])):
        body = pdf.body[count]
        objs = body.objects

        for index in objs:
            oid = objs[index].id
            offset = objs[index].offset
            size = objs[index].size
            details = objs[index].object
            if details.type == "stream":
                encoded_stream = details.encodedStream
                decoded_stream = details.decodedStream
                is_flash = decoded_stream[:3]
                compare = ["CWS","FWS"]
                flash_match = [s for s in objs if is_flash in compare]
                if flash_match:
                    f = open(out + str(oid) + '_decoded_object.swf',"w")
                    f.write(decoded_stream.strip())
                    f.close()

        count += 1

コード例 #2

0

ファイルを表示

ファイル: swf_mastah.py プロジェクト: jonz-secops/pdfxray_public

def snatch(file, out):
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(file, True, False)
    statsDict = pdf.getStats()
    objs = []
    count = 0
    for version in range(len(statsDict['Versions'])):
        body = pdf.body[count]
        objs = body.objects

        for index in objs:
            oid = objs[index].id
            offset = objs[index].offset
            size = objs[index].size
            details = objs[index].object
            if details.type == "stream":
                encoded_stream = details.encodedStream
                decoded_stream = details.decodedStream
                is_flash = decoded_stream[:3]
                compare = ["CWS", "FWS"]
                flash_match = [s for s in objs if is_flash in compare]
                if flash_match:
                    f = open(out + str(oid) + '_decoded_object.swf', "w")
                    f.write(decoded_stream.strip())
                    f.close()

        count += 1

コード例 #3

0

ファイルを表示

ファイル: harness.py プロジェクト: 0day1day/pdfxray_public

def snatch_contents(file):
    pdfParser = PDFParser()
    ret,pdf = pdfParser.parse(file, True, False)
    body = pdf.body
    all_objs = []
    for item in body:
	objs = item.objects
	for index in objs:
	    filter = None
	    stream_details = []
	    oid = objs[index].id
	    offset = objs[index].offset
	    size = objs[index].size
	    details = objs[index].object
	    if details.type == "stream":
		decode_error = details.decodingError
		if details.filter != None:
		    filter = details.filter.rawValue
		stream_size = details.size
		encoded_stream = details.encodedStream
		decoded_stream = details.decodedStream

		encoded_md5 = hashlib.md5(encoded_stream).hexdigest()
		decoded_md5 = hashlib.md5(decoded_stream).hexdigest()
		encoded_hex = ByteToHex(encoded_stream)
		decoded_hex = ByteToHex(decoded_stream)
		
		encoded_stream = encoder_cleaner(encoded_stream)
		decoded_stream = encoder_cleaner(decoded_stream)
		    
		js_code = details.JSCode
		temp_entropy = get_entropy(details.rawStream)
		blocks = analyzer(temp_entropy)
		out_entropy = blocks.json_blocks
		stream_details = {'filter': filter,'size':stream_size,'encoded_stream':encoded_stream,'encoded_hash':encoded_md5,'encoded_hex':encoded_hex,'decoded_stream':decoded_stream,'decoded_hash':decoded_md5,'decoded_hex':decoded_hex,'decode_errors':decode_error,'js_code':js_code,'entropy':out_entropy,'flags':[]}
	    is_encrypted = details.encrypted
	    contains_js = details.containsJScode
	    errors = details.errors
	    raw_value = details.rawValue
	    references = details.references
	    try:
	        raw_md5 = hashlib.md5(raw_value).hexdigest()
	    except:
                raw_md5 = "error"
	    raw_value = encoder_cleaner(raw_value)
	    try:
		suspicious_events = details.suspiciousEvents
		suspicious_actions = details.suspiciousActions
		suspicious_elements = details.suspiciousElements
		vulns = details.vulns
	    except:
		suspicious_events = []
		suspicious_actions = []
		suspicious_elements = []
		vulns = []	    
	    temp = {'id':oid,'offset':offset,'size':size,'stream':stream_details,'encrypted':is_encrypted,'contains_js':contains_js,'raw':raw_value,'raw_hash':raw_md5,'references':references,'errors':errors,'suspicious_events':suspicious_events,'suspicious_actions':suspicious_actions,'suspicious_elements':suspicious_elements,'vulns':vulns}
	    all_objs.append(temp)

    return json.dumps(all_objs)

コード例 #4

0

ファイルを表示

ファイル: harness.py プロジェクト: 9b/pdfxray_lite

def snatch_version(file):
    pdfParser = PDFParser()
    ret,pdf = pdfParser.parse(file, True, False)
    statsDict = pdf.getStats()
    objs = []
    count = 0
    for version in range(len(statsDict['Versions'])):
	meta = pdf.getBasicMetadata(count)
	author = ""
	creator = ""
	producer = ""
	creation_date = ""
	modification_date = ""
	if meta.has_key('author'):
		author = encoder_cleaner(meta['author'])
	if meta.has_key('creator'):
		creator = encoder_cleaner(meta['creator'])
	if meta.has_key('producer'):
		producer = encoder_cleaner(meta['producer'])
	if meta.has_key('creation'):
		creation_date = encoder_cleaner(meta['creation'])
	if meta.has_key('modification'):
		modification_date = encoder_cleaner(meta['modification'])
	suspicious_events_present = "false"
	suspicious_actions_present = "false"
	suspicious_elements_present = "false"
	vulnerabilities_present = "false"
	statsVersion = statsDict['Versions'][version]
	actions = statsVersion['Actions']
	events = statsVersion['Events']
	vulns = statsVersion['Vulns']
	elements = statsVersion['Elements']
	tmp = statsVersion['Objects'][1]
	object_ids = tmp[1:-1].split(',')
	if events != None or actions != None or vulns != None or elements != None:
	    if events != None:
		suspicious_events_present = "true"
	    if actions != None:
		suspicious_actions_present = "true"
	    if elements != None:
		suspicious_elements_present = "true"
	    if vulns != None:
		vulnerabilities_present = "true"
		
	obj = {'version': version,'object_ids':object_ids,'total_objects':statsVersion['Objects'][0],'author':author,'creator':creator,'producer':producer,'creation_date':creation_date,'modification_date':modification_date,'suspicious_events_present':suspicious_events_present,'suspicious_actions_present':suspicious_actions_present,'suspicious_elements_present':suspicious_elements_present,'vulnerabilities_present':vulnerabilities_present}
	objs.append(obj)
	count += 1
	
    return json.dumps(objs)

コード例 #5

0

ファイルを表示

def handle_uploaded_file(request,rsesh,ruser):
	global hash
	f = request['file']
	filename = f.name
	destination = open('%s/%s' % (settings.MEDIA_ROOT + '/uploads/', filename), 'wb')
	for chunk in f.chunks():
		destination.write(chunk)
	destination.close()
    
	file = settings.MEDIA_ROOT + '/uploads/' + filename
	hash = get_hash_data(file, "md5") #grab the hash so we can see if the file is present
	is_present = get_sample(hash) #grabs the sample if it is there, if not then it runs
	if is_present == None:
		stored = True
		user = None
		t = time()
		pdfParser = PDFParser()
		ret,data = pdfParser.parse(file, True, False)
		process_time = time() - t
		#data = jPdf(json.loads(output)) #build the class object
		#store_it = store_sample(output) #try and store the raw data
		#if store_it == None:
		#	rsesh['store_error'] = True #we can let the user know if it stored with this (true an error happened)
		#	stored = False
			
		if ruser.is_authenticated:
			user = ruser.username
		else:
			user = "******"

		fstat = { 'date_time':time(),'filename':filename,'stored':stored,'process_time':process_time,'user':user,'hash':hash }
		store_file_stats(json.dumps(fstat))
	else:
		data = is_present #the file was present and returned

	rsesh[hash] = data #throw the class object in the session to avoid DB hits
	
	return data

コード例 #6

0

ファイルを表示

    else:
        if len(args) == 1:
            fileName = args[0]
            if not os.path.exists(fileName):
                sys.exit('Error: The file "' + fileName + '" does not exist!!')
        elif len(args) > 1 or (len(args) == 0 and not options.isInteractive):
            sys.exit(argsParser.print_help())

        if options.scriptFile is not None:
            if not os.path.exists(options.scriptFile):
                sys.exit('Error: The script file "' + options.scriptFile +
                         '" does not exist!!')

        if fileName is not None:
            pdfParser = PDFParser()
            ret, pdf = pdfParser.parse(fileName, options.isForceMode,
                                       options.isLooseMode,
                                       options.isManualAnalysis)
            if options.checkOnVT:
                # Checks the MD5 on VirusTotal
                md5Hash = pdf.getMD5()
                ret = vtcheck(md5Hash, VT_KEY)
                if ret[0] == -1:
                    pdf.addError(ret[1])
                else:
                    vtJsonDict = ret[1]
                    if vtJsonDict.has_key('response_code'):
                        if vtJsonDict['response_code'] == 1:
                            if vtJsonDict.has_key(
                                    'positives') and vtJsonDict.has_key(

コード例 #7

0

ファイルを表示

ファイル: peepdf.py プロジェクト: adepasquale/thug

            print message

    else:
        if len(args) == 1:
            fileName = args[0]
            if not os.path.exists(fileName):
                sys.exit('Error: The file "' + fileName + '" does not exist!!')
        elif len(args) > 1 or (len(args) == 0 and not options.isInteractive):
            sys.exit(argsParser.print_help())

        if options.scriptFile is not None:
            if not os.path.exists(options.scriptFile):
                sys.exit('Error: The script file "' + options.scriptFile + '" does not exist!!')

        if fileName is not None:
            pdfParser = PDFParser()
            ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis)
            if options.checkOnVT:
                # Checks the MD5 on VirusTotal
                md5Hash = pdf.getMD5()
                ret = vtcheck(md5Hash, VT_KEY)
                if ret[0] == -1:
                    pdf.addError(ret[1])
                else:
                    vtJsonDict = ret[1]
                    if vtJsonDict.has_key('response_code'):
                        if vtJsonDict['response_code'] == 1:
                            if vtJsonDict.has_key('positives') and vtJsonDict.has_key('total'):
                                pdf.setDetectionRate([vtJsonDict['positives'], vtJsonDict['total']])
                            else:
                                pdf.addError('Missing elements in the response from VirusTotal!!')

コード例 #8

0

ファイルを表示

def snatch_contents(file):
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(file, True, False)
    body = pdf.body
    all_objs = []
    for item in body:
        objs = item.objects
        for index in objs:
            filter = None
            stream_details = []
            oid = objs[index].id
            offset = objs[index].offset
            size = objs[index].size
            details = objs[index].object
            if details.type == "stream":
                decode_error = details.decodingError
                if details.filter != None:
                    filter = details.filter.rawValue
                stream_size = details.size
                encoded_stream = details.encodedStream
                decoded_stream = details.decodedStream

                encoded_md5 = hashlib.md5(encoded_stream).hexdigest()
                decoded_md5 = hashlib.md5(decoded_stream).hexdigest()
                encoded_hex = ByteToHex(encoded_stream)
                decoded_hex = ByteToHex(decoded_stream)

                encoded_stream = encoder_cleaner(encoded_stream)
                decoded_stream = encoder_cleaner(decoded_stream)

                js_code = details.JSCode
                stream_details = {
                    'filter': filter,
                    'size': stream_size,
                    'encoded_stream': encoded_stream,
                    'encoded_hash': encoded_md5,
                    'encoded_hex': encoded_hex,
                    'decoded_stream': decoded_stream,
                    'decoded_hash': decoded_md5,
                    'decoded_hex': decoded_hex,
                    'decode_errors': decode_error,
                    'js_code': js_code,
                    'flags': []
                }
            is_encrypted = details.encrypted
            contains_js = details.containsJScode
            errors = details.errors
            raw_value = details.rawValue
            references = details.references
            try:
                raw_md5 = hashlib.md5(raw_value).hexdigest()
            except:
                raw_md5 = "error"
            raw_value = encoder_cleaner(raw_value)
            try:
                suspicious_events = details.suspiciousEvents
                suspicious_actions = details.suspiciousActions
                suspicious_elements = details.suspiciousElements
                vulns = details.vulns
            except:
                suspicious_events = []
                suspicious_actions = []
                suspicious_elements = []
                vulns = []
            temp = {
                'id': oid,
                'offset': offset,
                'size': size,
                'stream': stream_details,
                'encrypted': is_encrypted,
                'contains_js': contains_js,
                'raw': raw_value,
                'raw_hash': raw_md5,
                'references': references,
                'errors': errors,
                'suspicious_events': suspicious_events,
                'suspicious_actions': suspicious_actions,
                'suspicious_elements': suspicious_elements,
                'vulns': vulns
            }
            all_objs.append(temp)

    return json.dumps(all_objs)

コード例 #9

0

ファイルを表示

def snatch_version(file):
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(file, True, False)
    statsDict = pdf.getStats()
    objs = []
    count = 0
    for version in range(len(statsDict['Versions'])):
        meta = pdf.getBasicMetadata(count)
        author = ""
        creator = ""
        producer = ""
        creation_date = ""
        modification_date = ""
        if meta.has_key('author'):
            author = encoder_cleaner(meta['author'])
        if meta.has_key('creator'):
            creator = encoder_cleaner(meta['creator'])
        if meta.has_key('producer'):
            producer = encoder_cleaner(meta['producer'])
        if meta.has_key('creation'):
            creation_date = encoder_cleaner(meta['creation'])
        if meta.has_key('modification'):
            modification_date = encoder_cleaner(meta['modification'])
        suspicious_events_present = "false"
        suspicious_actions_present = "false"
        suspicious_elements_present = "false"
        vulnerabilities_present = "false"
        statsVersion = statsDict['Versions'][version]
        actions = statsVersion['Actions']
        events = statsVersion['Events']
        vulns = statsVersion['Vulns']
        elements = statsVersion['Elements']
        tmp = statsVersion['Objects'][1]
        object_ids = tmp[1:-1].split(',')
        if events != None or actions != None or vulns != None or elements != None:
            if events != None:
                suspicious_events_present = "true"
            if actions != None:
                suspicious_actions_present = "true"
            if elements != None:
                suspicious_elements_present = "true"
            if vulns != None:
                vulnerabilities_present = "true"

        obj = {
            'version': version,
            'object_ids': object_ids,
            'total_objects': statsVersion['Objects'][0],
            'author': author,
            'creator': creator,
            'producer': producer,
            'creation_date': creation_date,
            'modification_date': modification_date,
            'suspicious_events_present': suspicious_events_present,
            'suspicious_actions_present': suspicious_actions_present,
            'suspicious_elements_present': suspicious_elements_present,
            'vulnerabilities_present': vulnerabilities_present
        }
        objs.append(obj)
        count += 1

    return json.dumps(objs)

コード例 #10

0

ファイルを表示

ファイル: hsn2peepdftaskprocessor.py プロジェクト: the-st0rm/hsn2-peepdf

	def taskProcess(self):
		'''
		Returns a list of warnings (warnings). The current task is available at self.currentTask
		'''
		logging.debug(self.__class__)
		logging.debug(self.currentTask)
		logging.debug(self.objects)
		if len(self.objects) == 0:
			raise ObjectStoreException("Task processing didn't find task object.")

		content = ""
		if self.objects[0].isSet("content"):
			filepath = self.dsAdapter.saveTmp(self.currentTask.job, self.objects[0].content.getKey())
			os.system("chmod 755 %s" %(filepath))


		else:
			raise ParamException("content is missing.")


		#TO-DO recieve parameters from to give it to PDFParse for better results
		for param in self.currentTask.parameters:
			if param.name == "param":
				value = str(param.value)
				if len(value) > 0:
					pass
				break

		self.objects[0].addTime("peepdf_time_start",int(time.time() * 1000))

		


		pdfParser = PDFParser()
		isForceMode = True
		isLooseMode = False
		isManualAnalysis = False
		ret,pdf = pdfParser.parse(filepath, isForceMode, isLooseMode, isManualAnalysis)

		#TO-DO recieve parameters from to give it to PDFParse for better results
		if ret==0:
			self.objects[0].addBool("peepdf_executed_successfully", True)
			self.objects[0].addString("file_SHA1", pdf.getSHA1())
			self.objects[0].addString("peepdf_suspicious_content", str(pdf.getSuspiciousComponents()))
			self.objects[0].addString("peepdf_metadata", str(pdf.getMetadata()))
			self.objects[0].addString("peepdf_javascript", str(pdf.getJavascriptCode()))
			self.objects[0].addTime("peepdf_numStreams", pdf.numStreams)

		else:
			self.objects[0].addBool("peepdf_executed_successfully", False)
			#I should remove all the following add strings once I know how I can handle it from the jsont file
			self.objects[0].addString("file_SHA1", "Error")
			self.objects[0].addString("peepdf_suspicious_content", "Error")
			self.objects[0].addString("peepdf_metadata", "Error")
			self.objects[0].addString("peepdf_javascript", "Error")
			self.objects[0].addTime("peepdf_numStreams", 0)

		self.objects[0].addTime("peepdf_time_stop",int(time.time() * 1000))

		self.dsAdapter.removeTmp(filepath)

		return []

コード例 #11

0

ファイルを表示

ファイル: harness.py プロジェクト: cvandeplas/pdfxray

def snatch_contents(file):
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(file, True, False)
    body = pdf.body
    all_objs = []
    for item in body:
        objs = item.objects
        for index in objs:
            filter = None
            stream_details = []
            oid = objs[index].id
            offset = objs[index].offset
            size = objs[index].size
            details = objs[index].object
            if details.type == "stream":
                decode_error = details.decodingError
                if details.filter != None:
                    filter = details.filter.rawValue
                stream_size = details.size
                encoded_stream = details.encodedStream
                decoded_stream = details.decodedStream

                encoded_md5 = hashlib.md5(encoded_stream).hexdigest()
                decoded_md5 = hashlib.md5(decoded_stream).hexdigest()
                encoded_hex = ByteToHex(encoded_stream)
                decoded_hex = ByteToHex(decoded_stream)

                encoded_stream = encoder_cleaner(encoded_stream)
                decoded_stream = encoder_cleaner(decoded_stream)

                js_code = details.JSCode
                temp_entropy = get_entropy(details.rawStream)
                blocks = analyzer(temp_entropy)
                out_entropy = blocks.json_blocks
                stream_details = {
                    "filter": filter,
                    "size": stream_size,
                    "encoded_stream": encoded_stream,
                    "encoded_hash": encoded_md5,
                    "encoded_hex": encoded_hex,
                    "decoded_stream": decoded_stream,
                    "decoded_hash": decoded_md5,
                    "decoded_hex": decoded_hex,
                    "decode_errors": decode_error,
                    "js_code": js_code,
                    "entropy": out_entropy,
                    "flags": [],
                }
            is_encrypted = details.encrypted
            contains_js = details.containsJScode
            errors = details.errors
            raw_value = details.rawValue
            references = details.references
            try:
                raw_md5 = hashlib.md5(raw_value).hexdigest()
            except:
                raw_md5 = "error"
            raw_value = encoder_cleaner(raw_value)
            try:
                suspicious_events = details.suspiciousEvents
                suspicious_actions = details.suspiciousActions
                suspicious_elements = details.suspiciousElements
                vulns = details.vulns
            except:
                suspicious_events = []
                suspicious_actions = []
                suspicious_elements = []
                vulns = []
                temp = {
                    "id": oid,
                    "offset": offset,
                    "size": size,
                    "stream": stream_details,
                    "encrypted": is_encrypted,
                    "contains_js": contains_js,
                    "raw": raw_value,
                    "raw_hash": raw_md5,
                    "references": references,
                    "errors": errors,
                    "suspicious_events": suspicious_events,
                    "suspicious_actions": suspicious_actions,
                    "suspicious_elements": suspicious_elements,
                    "vulns": vulns,
                }
                all_objs.append(temp)

    return json.dumps(all_objs)

コード例 #12

0

ファイルを表示

ファイル: harness.py プロジェクト: cvandeplas/pdfxray

def snatch_version(file):
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(file, True, False)
    statsDict = pdf.getStats()
    objs = []
    count = 0
    for version in range(len(statsDict["Versions"])):
        meta = pdf.getBasicMetadata(count)
        author = ""
        creator = ""
        producer = ""
        creation_date = ""
        modification_date = ""
        if meta.has_key("author"):
            author = encoder_cleaner(meta["author"])
        if meta.has_key("creator"):
            creator = encoder_cleaner(meta["creator"])
        if meta.has_key("producer"):
            producer = encoder_cleaner(meta["producer"])
        if meta.has_key("creation"):
            creation_date = encoder_cleaner(meta["creation"])
        if meta.has_key("modification"):
            modification_date = encoder_cleaner(meta["modification"])
        suspicious_events_present = "false"
        suspicious_actions_present = "false"
        suspicious_elements_present = "false"
        vulnerabilities_present = "false"
        statsVersion = statsDict["Versions"][version]
        actions = statsVersion["Actions"]
        events = statsVersion["Events"]
        vulns = statsVersion["Vulns"]
        elements = statsVersion["Elements"]
        tmp = statsVersion["Objects"][1]
        object_ids = tmp[1:-1].split(",")
        if events != None or actions != None or vulns != None or elements != None:
            if events != None:
                suspicious_events_present = "true"
            if actions != None:
                suspicious_actions_present = "true"
            if elements != None:
                suspicious_elements_present = "true"
            if vulns != None:
                vulnerabilities_present = "true"

        obj = {
            "version": version,
            "object_ids": object_ids,
            "total_objects": statsVersion["Objects"][0],
            "author": author,
            "creator": creator,
            "producer": producer,
            "creation_date": creation_date,
            "modification_date": modification_date,
            "suspicious_events_present": suspicious_events_present,
            "suspicious_actions_present": suspicious_actions_present,
            "suspicious_elements_present": suspicious_elements_present,
            "vulnerabilities_present": vulnerabilities_present,
        }
        objs.append(obj)
        count += 1

    return json.dumps(objs)

コード例 #13

0

ファイルを表示

ファイル: peepdf.py プロジェクト: 0day1day/pdfxray_public

if len(args) == 1:
	fileName = args[0]
	if not os.path.exists(fileName):
		sys.exit('Error: The file "'+fileName+'" does not exist!!')
elif len(args) > 1 or (len(args) == 0 and not options.isInteractive):
	sys.exit(argsParser.print_help())
	
if options.scriptFile != None:
	if not os.path.exists(options.scriptFile):
		sys.exit('Error: The script file "'+options.scriptFile+'" does not exist!!')
	
if fileName != None:
	if not JS_MODULE:
		stats += 'Warning: Spidermonkey is not installed!!'+newLine
	pdfParser = PDFParser()
	ret,pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode)
	errors = pdf.getErrors()
	for error in errors:
		if error.find('Decryption error') != -1:
			stats += error + newLine
	if stats != '':
		stats += newLine
	statsDict = pdf.getStats()
	stats += 'File: ' + statsDict['File'] + newLine
	stats += 'MD5: ' + statsDict['MD5'] + newLine
	stats += 'Size: ' + statsDict['Size'] + ' bytes' + newLine
	stats += 'Version: ' + statsDict['Version'] + newLine
	stats += 'Binary: ' + statsDict['Binary'] + newLine
	stats += 'Linearized: ' + statsDict['Linearized'] + newLine
	stats += 'Encrypted: ' + statsDict['Encrypted'] + newLine

コード例 #14

0

ファイルを表示

#!/usr/bin/python
# -*- coding: utf-8 -*-

import subprocess
from PDFCore import PDFParser


def getpdfhear():
    pass


VT_KEY = 'fc90df3f5ac749a94a94cb8bf87e05a681a2eb001aef34b6a0084b8c22c97a64'
if __name__ == '__main__':
    '''
    cmd ='python peepdf.py F:\PDFdata\VirusShare_PDF_20170404\cve2010-2883 -i'
    subprocess.call(cmd,shell=True)
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse('F:\PDFdata\VirusShare_PDF_20170404\cve2010-2883')
    ps = pdfParser(pdf,VT_KEY,False).do_tree()
    gtr =  pdf.getObject(7)
    print gtr
    #statsDict = pdf.getStats
    '''
    pdfParser = PDFParser()
    ret, pdf = pdfParser.parse(
        'F:\PDFdata\VirusShare_PDF_20170404\cve2010-2883')
    print pdf.getTree()
    print pdf.getObject(12).getValue()

コード例 #15

0

ファイルを表示

def main():
    global COLORIZED_OUTPUT
    version = '0.3'
    revision = '8'
    versionHeader = 'Version: peepdf ' + version + ' r' + revision
    author = 'Jose Miguel Esparza'
    email = 'peepdf AT eternal-todo.com'
    url = 'http://peepdf.eternal-todo.com'
    twitter = 'http://twitter.com/EternalTodo'
    peepTwitter = 'http://twitter.com/peepdf'
    newLine = os.linesep
    currentDir = os.getcwd()
    absPeepdfRoot = os.path.dirname(os.path.realpath(sys.argv[0]))
    errorsFile = os.path.join(currentDir, 'peepdf_errors.txt')
    peepdfHeader = versionHeader + newLine * 2 + url + newLine + peepTwitter + newLine + email + newLine * 2 + author + newLine + twitter + newLine
    argsParser = optparse.OptionParser(
        usage='Usage: peepdf.py [options] PDF_file', description=versionHeader)
    argsParser.add_option('-i',
                          '--interactive',
                          action='store_true',
                          dest='isInteractive',
                          default=False,
                          help='Sets console mode.')
    argsParser.add_option(
        '-s',
        '--load-script',
        action='store',
        type='string',
        dest='scriptFile',
        help='Loads the commands stored in the specified file and execute them.'
    )
    argsParser.add_option(
        '-c',
        '--check-vt',
        action='store_true',
        dest='checkOnVT',
        default=False,
        help='Checks the hash of the PDF file on VirusTotal.')
    argsParser.add_option('-f',
                          '--force-mode',
                          action='store_true',
                          dest='isForceMode',
                          default=False,
                          help='Sets force parsing mode to ignore errors.')
    argsParser.add_option(
        '-l',
        '--loose-mode',
        action='store_true',
        dest='isLooseMode',
        default=False,
        help='Sets loose parsing mode to catch malformed objects.')
    argsParser.add_option(
        '-m',
        '--manual-analysis',
        action='store_true',
        dest='isManualAnalysis',
        default=False,
        help=
        'Avoids automatic Javascript analysis. Useful with eternal loops like heap spraying.'
    )
    argsParser.add_option(
        '-g',
        '--grinch-mode',
        action='store_true',
        dest='avoidColors',
        default=False,
        help='Avoids colorized output in the interactive console.')
    argsParser.add_option('-v',
                          '--version',
                          action='store_true',
                          dest='version',
                          default=False,
                          help='Shows program\'s version number.')
    argsParser.add_option('-x',
                          '--xml',
                          action='store_true',
                          dest='xmlOutput',
                          default=False,
                          help='Shows the document information in XML format.')
    argsParser.add_option(
        '-j',
        '--json',
        action='store_true',
        dest='jsonOutput',
        default=False,
        help='Shows the document information in JSON format.')
    argsParser.add_option(
        '-C',
        '--command',
        action='append',
        type='string',
        dest='commands',
        help='Specifies a command from the interactive console to be executed.'
    )
    (options, args) = argsParser.parse_args()

    stats = ''
    pdf = None
    fileName = None
    statsDict = None
    vtJsonDict = None

    try:
        # Avoid colors in the output
        if not COLORIZED_OUTPUT or options.avoidColors:
            warningColor = ''
            errorColor = ''
            alertColor = ''
            staticColor = ''
            resetColor = ''
        else:
            warningColor = Fore.YELLOW
            errorColor = Fore.RED
            alertColor = Fore.RED
            staticColor = Fore.BLUE
            resetColor = Style.RESET_ALL
        if options.version:
            print peepdfHeader

        else:
            if len(args) == 1:
                fileName = args[0]
                if not os.path.exists(fileName):
                    sys.exit('Error: The file "' + fileName +
                             '" does not exist!!')
            elif len(args) > 1 or (len(args) == 0
                                   and not options.isInteractive):
                sys.exit(argsParser.print_help())

            if options.scriptFile is not None:
                if not os.path.exists(options.scriptFile):
                    sys.exit('Error: The script file "' + options.scriptFile +
                             '" does not exist!!')

            if fileName is not None:
                pdfParser = PDFParser()
                ret, pdf = pdfParser.parse(fileName, options.isForceMode,
                                           options.isLooseMode,
                                           options.isManualAnalysis)
                if options.checkOnVT:
                    # Checks the MD5 on VirusTotal
                    md5Hash = pdf.getMD5()
                    ret = vtcheck(md5Hash, VT_KEY)
                    if ret[0] == -1:
                        pdf.addError(ret[1])
                    else:
                        vtJsonDict = ret[1]
                        if vtJsonDict.has_key('response_code'):
                            if vtJsonDict['response_code'] == 1:
                                if vtJsonDict.has_key(
                                        'positives') and vtJsonDict.has_key(
                                            'total'):
                                    pdf.setDetectionRate([
                                        vtJsonDict['positives'],
                                        vtJsonDict['total']
                                    ])
                                else:
                                    pdf.addError(
                                        'Missing elements in the response from VirusTotal!!'
                                    )
                                if vtJsonDict.has_key('permalink'):
                                    pdf.setDetectionReport(
                                        vtJsonDict['permalink'])
                            else:
                                pdf.setDetectionRate(None)
                        else:
                            pdf.addError('Bad response from VirusTotal!!')
                statsDict = pdf.getStats()

            if options.xmlOutput:
                try:
                    xml = getPeepXML(statsDict, version, revision)
                    sys.stdout.write(xml)
                except:
                    errorMessage = '*** Error: Exception while generating the XML file!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            elif options.jsonOutput and not options.commands:
                try:
                    jsonReport = getPeepJSON(statsDict, version, revision)
                    sys.stdout.write(jsonReport)
                except:
                    errorMessage = '*** Error: Exception while generating the JSON report!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            else:
                if COLORIZED_OUTPUT and not options.avoidColors:
                    try:
                        init()
                    except:
                        COLORIZED_OUTPUT = False
                if options.scriptFile is not None:
                    from PDFConsole import PDFConsole

                    scriptFileObject = open(options.scriptFile, 'rb')
                    console = PDFConsole(pdf,
                                         VT_KEY,
                                         options.avoidColors,
                                         stdin=scriptFileObject)
                    try:
                        console.cmdloop()
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch mode!!'
                        scriptFileObject.close()
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                elif options.commands is not None:
                    from PDFConsole import PDFConsole

                    console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                    try:
                        for command in options.commands:
                            console.onecmd(command)
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch commands!!'
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                else:
                    if statsDict is not None:
                        if COLORIZED_OUTPUT and not options.avoidColors:
                            beforeStaticLabel = staticColor
                        else:
                            beforeStaticLabel = ''

                        if not JS_MODULE:
                            warningMessage = 'Warning: PyV8 is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not EMU_MODULE:
                            warningMessage = 'Warning: pylibemu is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not PIL_MODULE:
                            warningMessage = 'Warning: Python Imaging Library (PIL) is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        errors = statsDict['Errors']
                        for error in errors:
                            if error.find('Decryption error') != -1:
                                stats += errorColor + error + resetColor + newLine
                        if stats != '':
                            stats += newLine
                        statsDict = pdf.getStats()

                        stats += beforeStaticLabel + 'File: ' + resetColor + statsDict[
                            'File'] + newLine
                        stats += beforeStaticLabel + 'MD5: ' + resetColor + statsDict[
                            'MD5'] + newLine
                        stats += beforeStaticLabel + 'SHA1: ' + resetColor + statsDict[
                            'SHA1'] + newLine
                        stats += beforeStaticLabel + 'SHA256: ' + resetColor + statsDict[
                            'SHA256'] + newLine
                        stats += beforeStaticLabel + 'Size: ' + resetColor + statsDict[
                            'Size'] + ' bytes' + newLine
                        if options.checkOnVT:
                            if statsDict['Detection'] != []:
                                detectionReportInfo = ''
                                if statsDict['Detection'] != None:
                                    detectionColor = ''
                                    if COLORIZED_OUTPUT and not options.avoidColors:
                                        detectionLevel = statsDict[
                                            'Detection'][0] / (
                                                statsDict['Detection'][1] / 3)
                                        if detectionLevel == 0:
                                            detectionColor = alertColor
                                        elif detectionLevel == 1:
                                            detectionColor = warningColor
                                    detectionRate = '%s%d%s/%d' % (
                                        detectionColor,
                                        statsDict['Detection'][0], resetColor,
                                        statsDict['Detection'][1])
                                    if statsDict['Detection report'] != '':
                                        detectionReportInfo = beforeStaticLabel + 'Detection report: ' + resetColor + \
                                                              statsDict['Detection report'] + newLine
                                else:
                                    detectionRate = 'File not found on VirusTotal'
                                stats += beforeStaticLabel + 'Detection: ' + resetColor + detectionRate + newLine
                                stats += detectionReportInfo
                        stats += beforeStaticLabel + 'Version: ' + resetColor + statsDict[
                            'Version'] + newLine
                        stats += beforeStaticLabel + 'Binary: ' + resetColor + statsDict[
                            'Binary'] + newLine
                        stats += beforeStaticLabel + 'Linearized: ' + resetColor + statsDict[
                            'Linearized'] + newLine
                        stats += beforeStaticLabel + 'Encrypted: ' + resetColor + statsDict[
                            'Encrypted']
                        if statsDict['Encryption Algorithms'] != []:
                            stats += ' ('
                            for algorithmInfo in statsDict[
                                    'Encryption Algorithms']:
                                stats += algorithmInfo[0] + ' ' + str(
                                    algorithmInfo[1]) + ' bits, '
                            stats = stats[:-2] + ')'
                        stats += newLine
                        stats += beforeStaticLabel + 'Updates: ' + resetColor + statsDict[
                            'Updates'] + newLine
                        stats += beforeStaticLabel + 'Objects: ' + resetColor + statsDict[
                            'Objects'] + newLine
                        stats += beforeStaticLabel + 'Streams: ' + resetColor + statsDict[
                            'Streams'] + newLine
                        stats += beforeStaticLabel + 'URIs: ' + resetColor + statsDict[
                            'URIs'] + newLine
                        stats += beforeStaticLabel + 'Comments: ' + resetColor + statsDict[
                            'Comments'] + newLine
                        stats += beforeStaticLabel + 'Errors: ' + resetColor + str(
                            len(statsDict['Errors'])) + newLine * 2
                        for version in range(len(statsDict['Versions'])):
                            statsVersion = statsDict['Versions'][version]
                            stats += beforeStaticLabel + 'Version ' + resetColor + str(
                                version) + ':' + newLine
                            if statsVersion['Catalog'] != None:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + statsVersion[
                                    'Catalog'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + 'No' + newLine
                            if statsVersion['Info'] != None:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + statsVersion[
                                    'Info'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + 'No' + newLine
                            stats += beforeStaticLabel + '\tObjects (' + statsVersion[
                                'Objects'][0] + '): ' + resetColor + str(
                                    statsVersion['Objects'][1]) + newLine
                            if statsVersion['Compressed Objects'] != None:
                                stats += beforeStaticLabel + '\tCompressed objects (' + statsVersion[
                                    'Compressed Objects'][
                                        0] + '): ' + resetColor + str(
                                            statsVersion['Compressed Objects']
                                            [1]) + newLine
                            if statsVersion['Errors'] != None:
                                stats += beforeStaticLabel + '\t\tErrors (' + statsVersion[
                                    'Errors'][0] + '): ' + resetColor + str(
                                        statsVersion['Errors'][1]) + newLine
                            stats += beforeStaticLabel + '\tStreams (' + statsVersion[
                                'Streams'][0] + '): ' + resetColor + str(
                                    statsVersion['Streams'][1])
                            if statsVersion['Xref Streams'] != None:
                                stats += newLine + beforeStaticLabel + '\t\tXref streams (' + statsVersion[
                                    'Xref Streams'][
                                        0] + '): ' + resetColor + str(
                                            statsVersion['Xref Streams'][1])
                            if statsVersion['Object Streams'] != None:
                                stats += newLine + beforeStaticLabel + '\t\tObject streams (' + \
                                         statsVersion['Object Streams'][0] + '): ' + resetColor + str(
                                    statsVersion['Object Streams'][1])
                            if int(statsVersion['Streams'][0]) > 0:
                                stats += newLine + beforeStaticLabel + '\t\tEncoded (' + statsVersion[
                                    'Encoded'][0] + '): ' + resetColor + str(
                                        statsVersion['Encoded'][1])
                                if statsVersion['Decoding Errors'] != None:
                                    stats += newLine + beforeStaticLabel + '\t\tDecoding errors (' + \
                                             statsVersion['Decoding Errors'][0] + '): ' + resetColor + str(
                                        statsVersion['Decoding Errors'][1])
                            if statsVersion['URIs'] is not None:
                                stats += newLine + beforeStaticLabel + '\tObjects with URIs (' + \
                                         statsVersion['URIs'][0] + '): ' + resetColor + str(statsVersion['URIs'][1])
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = warningColor
                            if statsVersion['Objects with JS code'] != None:
                                stats += newLine + beforeStaticLabel + '\tObjects with JS code (' + \
                                         statsVersion['Objects with JS code'][0] + '): ' + resetColor + str(
                                    statsVersion['Objects with JS code'][1])
                            actions = statsVersion['Actions']
                            events = statsVersion['Events']
                            vulns = statsVersion['Vulns']
                            elements = statsVersion['Elements']
                            if events != None or actions != None or vulns != None or elements != None:
                                stats += newLine + beforeStaticLabel + '\tSuspicious elements:' + resetColor + newLine
                                if events != None:
                                    for event in events:
                                        stats += '\t\t' + beforeStaticLabel + event + ' (%d): ' % len(events[event]) + \
                                                 resetColor + str(events[event]) + newLine
                                if actions != None:
                                    for action in actions:
                                        stats += '\t\t' + beforeStaticLabel + action + ' (%d): ' % len(actions[action]) + \
                                                 resetColor + str(actions[action]) + newLine
                                if vulns != None:
                                    for vuln in vulns:
                                        if vulnsDict.has_key(vuln):
                                            vulnName = vulnsDict[vuln][0]
                                            vulnCVEList = vulnsDict[vuln][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:-1] + ') (%d): ' % len(
                                                vulns[vuln]) + resetColor + str(
                                                    vulns[vuln]) + newLine
                                        else:
                                            stats += '\t\t' + beforeStaticLabel + vuln + ' (%d): ' % len(vulns[vuln]) + \
                                                     resetColor + str(vulns[vuln]) + newLine
                                if elements != None:
                                    for element in elements:
                                        if vulnsDict.has_key(element):
                                            vulnName = vulnsDict[element][0]
                                            vulnCVEList = vulnsDict[element][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:
                                                          -1] + '): ' + resetColor + str(
                                                              elements[element]
                                                          ) + newLine
                                        else:
                                            stats += '\t\t' + beforeStaticLabel + element + ': ' + resetColor + str(
                                                elements[element]) + newLine
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = staticColor
                            urls = statsVersion['URLs']
                            if urls != None:
                                stats += newLine + beforeStaticLabel + '\tFound URLs:' + resetColor + newLine
                                for url in urls:
                                    stats += '\t\t' + url + newLine
                            stats += newLine * 2
                    if fileName != None:
                        print stats
                    if options.isInteractive:
                        from PDFConsole import PDFConsole

                        console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                        while not console.leaving:
                            try:
                                console.cmdloop()
                            except KeyboardInterrupt as e:
                                sys.exit()
                            except:
                                errorMessage = '*** Error: Exception not handled using the interactive console!! Please, report it to the author!!'
                                print errorColor + errorMessage + resetColor + newLine
                                traceback.print_exc(file=open(errorsFile, 'a'))
    except Exception as e:
        if len(e.args) == 2:
            excName, excReason = e.args
        else:
            excName = excReason = None
        if excName == None or excName != 'PeepException':
            errorMessage = '*** Error: Exception not handled!!'
            traceback.print_exc(file=open(errorsFile, 'a'))
        print errorColor + errorMessage + resetColor + newLine
    finally:
        if os.path.exists(errorsFile):
            message = newLine + 'Please, don\'t forget to report the errors found:' + newLine * 2
            message += '\t- Sending the file "%s" to the author (mailto:[email protected])%s' % (
                errorsFile, newLine)
            message += '\t- And/Or creating an issue on the project webpage (https://github.com/jesparza/peepdf/issues)' + newLine
            message = errorColor + message + resetColor
            sys.exit(message)