Ejemplo n.º 1
0
def DumpStream(path, objid):
    if not (os.path.isfile(path)):
        print '{0} not a file!'.format(path)
        return 1

    try:
        pdfParser = PDFParser()
        _, pdf = pdfParser.parse(path, True)

        if not pdf:
            return 2
        else:
            # get object
            obj = pdf.getObject(objid, None)
            if not obj:
                print '{0} stream not found!'.format(objid)
                return 1

            if obj.getType() != 'stream':
                print '{0} is not a stream!'.format(objid)
                return 1

            value = obj.getStream()
            if value != -1:
                print value

    except Exception as ex:
        print str(ex)
        return 1

    return 0
Ejemplo n.º 2
0
    def handle_pdf(self, url, content):
        sample = log.ThugLogging.build_sample(content, url)
        if sample is None or sample['type'] not in ('PDF', ):
            return

        fd, rfile = tempfile.mkstemp()
        with open(rfile, 'wb') as fd:
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode=True, looseMode=True)  # pylint:disable=unused-variable
        except:  # pylint:disable=bare-except
            os.remove(rfile)
            return False

        statsDict = pdf.getStats()
        analysis = self.getPeepXML(statsDict, url)

        log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        log.ThugLogging.log_peepdf(log_dir, sample, analysis)

        self.swf_mastah(pdf, statsDict, url)
        os.remove(rfile)
        return True
Ejemplo n.º 3
0
def DumpStream(path, objid):
    if not(os.path.isfile(path)):
        print '{0} not a file!'.format(path)
        return 1
    
    try:   
        pdfParser   = PDFParser()
        _,pdf       = pdfParser.parse(path, True)
             
        if not pdf:
            return 2
        else: 
            # get object
            obj = pdf.getObject(objid, None)
            if not obj:
                print '{0} stream not found!'.format(objid)
                return 1
            
            if obj.getType() != 'stream':
                print '{0} is not a stream!'.format(objid)
                return 1
                
            value = obj.getStream()
            if value != -1:
                print value
                        
    except Exception as ex:
        print str(ex)
        return 1
        
    return 0
Ejemplo n.º 4
0
    def handle_pdf(self, url, content):
        m = hashlib.md5()
        m.update(content)
        md5sum = m.hexdigest()

        rfile = os.path.join(log.ThugLogging.baseDir, md5sum)
        with open(rfile, 'wb') as fd:
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode=True, looseMode=True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats()
        analysis = self.getPeepXML(statsDict, url)

        pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")

        try:
            os.makedirs(pdflogdir)
        except:
            pass

        report = os.path.join(pdflogdir, "%s.xml" % (statsDict["MD5"], ))
        with open(report, 'wb') as fd:
            fd.write(analysis)

        os.remove(rfile)
        return True
Ejemplo n.º 5
0
    def handle_pdf(self, url, content):
        m = hashlib.md5()
        m.update(content)
        md5sum = m.hexdigest()

        rfile = os.path.join(log.ThugLogging.baseDir, md5sum)
        with open(rfile, 'wb') as fd: 
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats() 
        analysis  = self.getPeepXML(statsDict, url)

        pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        
        try:
            os.makedirs(pdflogdir)
        except:
            pass

        report = os.path.join(pdflogdir, "%s.xml" % (statsDict["MD5"], ))
        with open(report, 'wb') as fd:
            fd.write(analysis)

        os.remove(rfile)
        return True
Ejemplo n.º 6
0
    def handle_pdf(self, url, content):
        m = hashlib.md5()
        m.update(content)
        md5sum = m.hexdigest()

        rfile = log.ThugLogging.store_content(log.ThugLogging.baseDir, md5sum, content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats() 
        analysis  = self.getPeepXML(statsDict, url)

        pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        
        try:
            os.makedirs(pdflogdir)
        except:
            pass

        log.ThugLogging.store_content(pdflogdir, "%s.xml" % (statsDict["MD5"], ), analysis)
        self.swf_mastah(pdf, statsDict)
        os.remove(rfile)
        return True
Ejemplo n.º 7
0
    def handle_pdf(self, url, content):
        sample = log.ThugLogging.build_sample(content, url)
        if sample is None or sample['type'] not in ('PDF', ):
            return

        fd, rfile = tempfile.mkstemp()
        with open(rfile, 'wb') as fd:
            fd.write(content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True) #pylint:disable=unused-variable
        except: #pylint:disable=bare-except
            os.remove(rfile)
            return False

        statsDict = pdf.getStats()
        analysis  = self.getPeepXML(statsDict, url)

        log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")
        log.ThugLogging.log_peepdf(log_dir, sample, analysis)

        self.swf_mastah(pdf, statsDict, url)
        os.remove(rfile)
        return True
Ejemplo n.º 8
0
    def handle_pdf(self, url, content):
        m = hashlib.md5()
        m.update(content)
        md5sum = m.hexdigest()

        rfile = log.ThugLogging.store_content(log.ThugLogging.baseDir, md5sum,
                                              content)

        pdfparser = PDFParser()

        try:
            ret, pdf = pdfparser.parse(rfile, forceMode=True, looseMode=True)
        except:
            os.remove(rfile)
            return False

        statsDict = pdf.getStats()
        analysis = self.getPeepXML(statsDict, url)

        pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf")

        try:
            os.makedirs(pdflogdir)
        except:
            pass

        log.ThugLogging.store_content(pdflogdir,
                                      "%s.xml" % (statsDict["MD5"], ),
                                      analysis)
        self.swf_mastah(pdf, statsDict)
        os.remove(rfile)
        return True
Ejemplo n.º 9
0
def fakeFile_check(filePath):
    try:
        from peepdf.PDFCore import PDFParser
        pdfParser = PDFParser()
        _, pdf = pdfParser.parse(filePath)
        return pdf
    except Exception:
        return None
Ejemplo n.º 10
0
        def get_streams():
            # This function is brutally ripped from Brandon Dixon's swf_mastah.py.

            # Initialize peepdf parser.
            parser = PDFParser()
            # Parse currently opened PDF document.
            ret, pdf = parser.parse(__sessions__.current.file.path, True, False)
            # Generate statistics.
            stats = pdf.getStats()

            results = []
            objects = []
            count = 0

            for version in range(len(stats["Version"])):
                body = pdf.body[count]
                objects = body.objects

                for index in objects:
                    oid = objects[index].id
                    offset = objects[index].offset
                    size = objects[index].size
                    details = objects[index].object

                    if details.type == "stream":
                        encoded_stream = details.encodedStream
                        decoded_stream = details.decodedStream

                        result = [oid, offset, size, get_type(decoded_stream)[:100]]

                        # If the stream needs to be dumped or opened, we do it
                        # and expand the results with the path to the stream dump.
                        if arg_open or arg_dump:
                            # If was instructed to dump, we already have a base folder.
                            if arg_dump:
                                folder = arg_dump
                            # Otherwise we juts generate a temporary one.
                            else:
                                folder = tempfile.mkdtemp()

                            # Dump stream to this path.
                            # TODO: sometimes there appear to be multiple streams
                            # with the same object ID. Is that even possible?
                            # It will cause conflicts.
                            dump_path = "{0}/{1}_{2}_stream.bin".format(folder, __sessions__.current.file.md5, oid)

                            with open(dump_path, "wb") as handle:
                                handle.write(decoded_stream.strip())

                            # Add dump path to the stream attributes.
                            result.append(dump_path)

                        # Update list of streams.
                        results.append(result)

                count += 1

            return results
Ejemplo n.º 11
0
def get_data(f_list):
    for i in f_list:
        T_file = file_root + i
        pdfParser = PDFParser()
        try:
            _, pdf = pdfParser.parse(T_file)
            newfile = os.getcwd() + '/' + file_classify
            shutil.copytree(file_root+i, newfile+i)   #复制文件到新文件夹  ,移动用move

        except Exception:
            continue

    return newfile
Ejemplo n.º 12
0
def get_data(f_list):
    for i in f_list:
        T_file = file_root + i
        pdfParser = PDFParser()

        try:
            _, pdf = pdfParser.parse(T_file)

        except Exception:
            os.remove(file_root + i)
            # shutil.move(file_root+i, file_classify+i)   #复制文件到新文件夹  ,移动用move/copyfile
            print i
            continue

    return i
Ejemplo n.º 13
0
 def parse_pdf(self, pdf):
     retval = True
     try:
         _, pdffile = PDFParser().parse(pdf,
                                        forceMode=True,
                                        manualAnalysis=True)
     except Exception as e:
         retval = False
         pdffile = '\n'.join([traceback.format_exc(), repr(e)])
     return retval, pdffile
Ejemplo n.º 14
0
def ProcessFile(path):
    if not (os.path.isfile(path)):
        print '{0} not a file!'.format(path)
        return 2

    try:
        data = {}
        data['valid'] = True
        pdfdata = {}
        pdfParser = PDFParser()
        _, pdf = pdfParser.parse(path, True)

        if not pdf:
            data['valid'] = False
        else:
            errors = []
            streams = []

            # general info
            statsDict = pdf.getStats()
            try:
                data['info'] = json.dumps(statsDict, indent=4, sort_keys=False)
            except Exception as e:
                data['info'] = e

            # enumerate errors
            if hasattr(pdf, 'errors'):
                errors.extend(pdf.errors)

            # enumerate streams
            statsDict = pdf.getStats()
            for versionId, statsVersion in enumerate(statsDict['Versions']):
                for objid in statsVersion['Objects'][1]:
                    obj = pdf.getObject(objid, versionId)
                    if not obj:
                        continue

                    stream = {}
                    stream['id'] = objid
                    stream['type'] = obj.getType()
                    stream['attributes'] = {}
                    stream['has_js'] = obj.containsJScode

                    if hasattr(obj, 'elements'):
                        for key in obj.elements:
                            element = obj.elements[key]
                            stream['attributes'][key] = convert_to_printable(
                                element.value)

                    if obj.getType() == 'stream':
                        stream['data_len'] = obj.size
                        value = obj.getStream()
                        if value != -1:
                            stream['data'] = convert_to_printable(value)

                    streams.append(stream)

            pdfdata['streams'] = streams
            pdfdata['errors'] = errors

        data['data'] = pdfdata
        encoded = json.dumps(data)
        print encoded
    except Exception as ex:
        data = {}
        data['valid'] = False
        data['error'] = str(ex)
        print json.dumps(data)
        return 1

    return 0
Ejemplo n.º 15
0
def main():
    global COLORIZED_OUTPUT

    argsParser = optparse.OptionParser(usage='Usage: peepdf.py [options] PDF_file', description=versionHeader)
    argsParser.add_option('-i', '--interactive', action='store_true', dest='isInteractive', default=False, help='Sets console mode.')
    argsParser.add_option('-s', '--load-script', action='store', type='string', dest='scriptFile', help='Loads the commands stored in the specified file and execute them.')
    argsParser.add_option('-c', '--check-vt', action='store_true', dest='checkOnVT', default=False, help='Checks the hash of the PDF file on VirusTotal.')
    argsParser.add_option('-f', '--force-mode', action='store_true', dest='isForceMode', default=False, help='Sets force parsing mode to ignore errors.')
    argsParser.add_option('-l', '--loose-mode', action='store_true', dest='isLooseMode', default=False, help='Sets loose parsing mode to catch malformed objects.')
    argsParser.add_option('-m', '--manual-analysis', action='store_true', dest='isManualAnalysis', default=False, help='Avoids automatic Javascript analysis. Useful with eternal loops like heap spraying.')
    argsParser.add_option('-g', '--grinch-mode', action='store_true', dest='avoidColors', default=False, help='Avoids colorized output in the interactive console.')
    argsParser.add_option('-v', '--version', action='store_true', dest='version', default=False, help='Shows program\'s version number.')
    argsParser.add_option('-x', '--xml', action='store_true', dest='xmlOutput', default=False, help='Shows the document information in XML format.')
    argsParser.add_option('-j', '--json', action='store_true', dest='jsonOutput', default=False, help='Shows the document information in JSON format.')
    argsParser.add_option('-C', '--command', action='append', type='string', dest='commands', help='Specifies a command from the interactive console to be executed.')
    (options, args) = argsParser.parse_args()

    stats = ""
    pdf = None
    fileName = None
    statsDict = None
    vtJsonDict = None

    try:
        # Avoid colors in the output
        if not COLORIZED_OUTPUT or options.avoidColors:
            warningColor = ''
            errorColor = ''
            alertColor = ''
            staticColor = ''
            resetColor = ''
        else:
            warningColor = Fore.YELLOW
            errorColor = Fore.RED
            alertColor = Fore.RED
            staticColor = Fore.BLUE
            resetColor = Style.RESET_ALL

        if options.version:
            print(peepdfHeader)
        else:
            if len(args) == 1:
                fileName = args[0]
                if not os.path.exists(fileName):
                    sys.exit('Error: The file "' + fileName + '" does not exist!!')
            elif len(args) > 1 or (len(args) == 0 and not options.isInteractive):
                sys.exit(argsParser.print_help())

            if options.scriptFile is not None:
                if not os.path.exists(options.scriptFile):
                    sys.exit('Error: The script file "' + options.scriptFile + '" does not exist!!')

            if fileName is not None:
                pdfParser = PDFParser()
                ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis)
                if options.checkOnVT:
                    # Checks the MD5 on VirusTotal
                    md5Hash = pdf.getMD5()
                    ret = vtcheck(md5Hash, VT_KEY)
                    if ret[0] == -1:
                        pdf.addError(ret[1])
                    else:
                        vtJsonDict = ret[1]
                        if "response_code" in vtJsonDict:
                            if vtJsonDict['response_code'] == 1:
                                if "positives" in vtJsonDict and "total" in vtJsonDict:
                                    pdf.setDetectionRate([vtJsonDict['positives'], vtJsonDict['total']])
                                else:
                                    pdf.addError('Missing elements in the response from VirusTotal!!')
                                if "permalink" in vtJsonDict:
                                    pdf.setDetectionReport(vtJsonDict['permalink'])
                            else:
                                pdf.setDetectionRate(None)
                        else:
                            pdf.addError('Bad response from VirusTotal!!')
                statsDict = pdf.getStats()

            if options.xmlOutput:
                try:
                    xml = getPeepXML(statsDict, _version, revision)
                    sys.stdout.write(xml)
                except:
                    errorMessage = '*** Error: Exception while generating the XML file!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            elif options.jsonOutput and not options.commands:
                try:
                    jsonReport = getPeepJSON(statsDict, _version, revision)
                    sys.stdout.write(jsonReport)
                except:
                    errorMessage = '*** Error: Exception while generating the JSON report!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            else:
                if COLORIZED_OUTPUT and not options.avoidColors:
                    try:
                        init()
                    except:
                        COLORIZED_OUTPUT = False
                if options.scriptFile is not None:
                    from peepdf.PDFConsole import PDFConsole

                    scriptFileObject = open(options.scriptFile, 'rb')
                    console = PDFConsole(pdf, VT_KEY, options.avoidColors, stdin=scriptFileObject)
                    try:
                        console.cmdloop()
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch mode!!'
                        scriptFileObject.close()
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                elif options.commands is not None:
                    from .PDFConsole import PDFConsole

                    console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                    try:
                        for command in options.commands:
                            console.onecmd(command)
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch commands!!'
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                else:
                    if statsDict is not None:
                        if COLORIZED_OUTPUT and not options.avoidColors:
                            beforeStaticLabel = staticColor
                        else:
                            beforeStaticLabel = ''

                        if not JS_MODULE:
                            warningMessage = 'Warning: PyV8 is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not EMU_MODULE:
                            warningMessage = 'Warning: pylibemu is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not PIL_MODULE:
                            warningMessage = 'Warning: Python Imaging Library (PIL) is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        errors = statsDict['Errors']
                        for error in errors:
                            if error.find('Decryption error') != -1:
                                stats += errorColor + error + resetColor + newLine
                        if stats != '':
                            stats += newLine
                        statsDict = pdf.getStats()

                        stats += beforeStaticLabel + 'File: ' + resetColor + statsDict['File'] + newLine
                        stats += beforeStaticLabel + 'MD5: ' + resetColor + statsDict['MD5'] + newLine
                        stats += beforeStaticLabel + 'SHA1: ' + resetColor + statsDict['SHA1'] + newLine
                        stats += beforeStaticLabel + 'SHA256: ' + resetColor + statsDict['SHA256'] + newLine
                        stats += beforeStaticLabel + 'Size: ' + resetColor + statsDict['Size'] + ' bytes' + newLine
                        if options.checkOnVT:
                            if statsDict['Detection'] != []:
                                detectionReportInfo = ''
                                if statsDict['Detection'] is not None:
                                    detectionColor = ''
                                    if COLORIZED_OUTPUT and not options.avoidColors:
                                        detectionLevel = statsDict['Detection'][0] / (statsDict['Detection'][1] / 3)
                                        if detectionLevel == 0:
                                            detectionColor = alertColor
                                        elif detectionLevel == 1:
                                            detectionColor = warningColor
                                    detectionRate = '%s%d%s/%d' % (
                                        detectionColor, statsDict['Detection'][0], resetColor, statsDict['Detection'][1])
                                    if statsDict['Detection report'] != '':
                                        detectionReportInfo = (
                                            beforeStaticLabel + 'Detection report: ' + resetColor +
                                            statsDict['Detection report'] + newLine
                                        )
                                else:
                                    detectionRate = 'File not found on VirusTotal'
                                stats += beforeStaticLabel + 'Detection: ' + resetColor + detectionRate + newLine
                                stats += detectionReportInfo
                        stats += beforeStaticLabel + 'Version: ' + resetColor + statsDict['Version'] + newLine
                        stats += beforeStaticLabel + 'Binary: ' + resetColor + statsDict['Binary'] + newLine
                        stats += beforeStaticLabel + 'Linearized: ' + resetColor + statsDict['Linearized'] + newLine
                        stats += beforeStaticLabel + 'Encrypted: ' + resetColor + statsDict['Encrypted']
                        if statsDict['Encryption Algorithms'] != []:
                            stats += ' ('
                            for algorithmInfo in statsDict['Encryption Algorithms']:
                                stats += algorithmInfo[0] + ' ' + str(algorithmInfo[1]) + ' bits, '
                            stats = stats[:-2] + ')'
                        stats += newLine
                        stats += beforeStaticLabel + 'Updates: ' + resetColor + statsDict['Updates'] + newLine
                        stats += beforeStaticLabel + 'Objects: ' + resetColor + statsDict['Objects'] + newLine
                        stats += beforeStaticLabel + 'Streams: ' + resetColor + statsDict['Streams'] + newLine
                        stats += beforeStaticLabel + 'URIs: ' + resetColor + statsDict['URIs'] + newLine
                        stats += beforeStaticLabel + 'Comments: ' + resetColor + statsDict['Comments'] + newLine
                        stats += beforeStaticLabel + 'Errors: ' + resetColor + str(len(statsDict['Errors'])) + newLine * 2
                        for version in range(len(statsDict['Versions'])):
                            statsVersion = statsDict['Versions'][version]
                            stats += beforeStaticLabel + 'Version ' + resetColor + str(version) + ':' + newLine
                            if statsVersion['Catalog'] is not None:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + statsVersion['Catalog'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + 'No' + newLine
                            if statsVersion['Info'] is not None:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + statsVersion['Info'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + 'No' + newLine
                            stats += beforeStaticLabel + '\tObjects (' + statsVersion['Objects'][
                                0] + '): ' + resetColor + str(statsVersion['Objects'][1]) + newLine
                            if statsVersion['Compressed Objects'] is not None:
                                stats += beforeStaticLabel + '\tCompressed objects (' + statsVersion['Compressed Objects'][
                                    0] + '): ' + resetColor + str(statsVersion['Compressed Objects'][1]) + newLine
                            if statsVersion['Errors'] is not None:
                                stats += beforeStaticLabel + '\t\tErrors (' + statsVersion['Errors'][
                                    0] + '): ' + resetColor + str(statsVersion['Errors'][1]) + newLine
                            stats += beforeStaticLabel + '\tStreams (' + statsVersion['Streams'][
                                0] + '): ' + resetColor + str(statsVersion['Streams'][1])
                            if statsVersion['Xref Streams'] is not None:
                                stats += newLine + beforeStaticLabel + '\t\tXref streams (' + statsVersion['Xref Streams'][
                                    0] + '): ' + resetColor + str(statsVersion['Xref Streams'][1])
                            if statsVersion['Object Streams'] is not None:
                                stats += (
                                    newLine + beforeStaticLabel + '\t\tObject streams (' +
                                    statsVersion['Object Streams'][0] + '): ' + resetColor +
                                    str(statsVersion['Object Streams'][1])
                                )
                            if int(statsVersion['Streams'][0]) > 0:
                                stats += (
                                    newLine + beforeStaticLabel + '\t\tEncoded (' + statsVersion['Encoded'][0] +
                                    '): ' + resetColor + str(statsVersion['Encoded'][1])
                                )
                                if statsVersion['Decoding Errors'] is not None:
                                    stats += (
                                        newLine + beforeStaticLabel + '\t\tDecoding errors (' +
                                        statsVersion['Decoding Errors'][0] + '): ' + resetColor +
                                        str(statsVersion['Decoding Errors'][1])
                                    )
                            if statsVersion['URIs'] is not None:
                                stats += (
                                    newLine + beforeStaticLabel + '\tObjects with URIs (' +
                                    statsVersion['URIs'][0] + '): ' + resetColor +
                                    str(statsVersion['URIs'][1])
                                )
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = warningColor
                            if statsVersion['Objects with JS code'] is not None:
                                stats += (
                                    newLine + beforeStaticLabel + '\tObjects with JS code (' +
                                    statsVersion['Objects with JS code'][0] + '): ' + resetColor +
                                    str(statsVersion['Objects with JS code'][1])
                                )
                            actions = statsVersion['Actions']
                            events = statsVersion['Events']
                            vulns = statsVersion['Vulns']
                            elements = statsVersion['Elements']
                            if events is not None or actions is not None or vulns is not None or elements is not None:
                                stats += newLine + beforeStaticLabel + '\tSuspicious elements:' + resetColor + newLine
                                if events is not None:
                                    for event in events:
                                        stats += (
                                            '\t\t' + beforeStaticLabel + event + ' (%d): ' % len(events[event]) +
                                            resetColor + str(events[event]) + newLine
                                        )
                                if actions is not None:
                                    for action in actions:
                                        stats += (
                                            '\t\t' + beforeStaticLabel + action + ' (%d): ' % len(actions[action]) +
                                            resetColor + str(actions[action]) + newLine
                                        )
                                if vulns is not None:
                                    for vuln in vulns:
                                        if vuln in vulnsDict:
                                            vulnName = vulnsDict[vuln][0]
                                            vulnCVEList = vulnsDict[vuln][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:-1] + ') (%d): ' % len(vulns[vuln]) + resetColor + str(vulns[vuln]) + newLine
                                        else:
                                            stats += (
                                                '\t\t' + beforeStaticLabel + vuln + ' (%d): ' % len(vulns[vuln]) +
                                                resetColor + str(vulns[vuln]) + newLine
                                            )
                                if elements is not None:
                                    for element in elements:
                                        if element in vulnsDict:
                                            vulnName = vulnsDict[element][0]
                                            vulnCVEList = vulnsDict[element][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:-1] + '): ' + resetColor + str(elements[element]) + newLine
                                        else:
                                            stats += '\t\t' + beforeStaticLabel + element + ': ' + resetColor + str(
                                                elements[element]) + newLine
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = staticColor
                            urls = statsVersion['URLs']
                            if urls is not None:
                                stats += newLine + beforeStaticLabel + '\tFound URLs:' + resetColor + newLine
                                for url in urls:
                                    stats += '\t\t' + url + newLine
                            stats += newLine * 2
                    if fileName is not None:
                        print(stats)
                    if options.isInteractive:
                        from peepdf.PDFConsole import PDFConsole

                        console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                        while not console.leaving:
                            try:
                                console.cmdloop()
                            except KeyboardInterrupt as e:
                                sys.exit()
                            except:
                                errorMessage = '*** Error: Exception not handled using the interactive console!! Please, report it to the author!!'
                                print(errorColor + errorMessage + resetColor + newLine)
                                traceback.print_exc(file=open(errorsFile, 'a'))
    except Exception as e:
        if len(e.args) == 2:
            excName, excReason = e.args
        else:
            excName = None
        if excName is None or excName != 'PeepException':
            errorMessage = '*** Error: Exception not handled!!'
            traceback.print_exc(file=open(errorsFile, 'a'))
        print(errorColor + errorMessage + resetColor + newLine)
    finally:
        if os.path.exists(errorsFile):
            message = newLine + 'Please, don\'t forget to report errors if found:' + newLine * 2
            message += '\t- Sending the file "%s" to the author (mailto:[email protected])%s' % (
                errorsFile, newLine)
            message += '\t- And/Or creating an issue on the project webpage (https://github.com/jesparza/peepdf/issues)' + newLine
            message = errorColor + message + resetColor
            sys.exit(message)
Ejemplo n.º 16
0
def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
    """Uses V8Py from peepdf to extract JavaScript from PDF objects."""

    if not HAVE_PEEPDF:
        return pdfresult

    log.debug("About to parse with PDFParser")
    parser = PDFParser()
    _, pdf = parser.parse(filepath,
                          forceMode=True,
                          looseMode=True,
                          manualAnalysis=False)
    urlset = set()
    annoturiset = set()
    objects = []
    retobjects = []
    metadata = {}

    base_uri = _set_base_uri(pdf)

    for i, body in enumerate(pdf.body):
        metatmp = pdf.getBasicMetadata(i)
        if metatmp:
            metadata = metatmp
        objects = body.objects
        for index in objects:
            oid = objects[index].id
            offset = objects[index].offset
            size = objects[index].size
            details = objects[index].object
            obj_data = {
                "Object ID": oid,
                "Offset": offset,
                "Size": size,
            }
            if details.type == "stream":
                # encoded_stream = details.encodedStream
                decoded_stream = details.decodedStream
                if not HAVE_V8PY:
                    continue
                jsdata = None
                try:
                    jslist, unescapedbytes, urlsfound, errors, ctxdummy = analyseJS(
                        decoded_stream.strip())
                    jsdata = jslist[0]
                except Exception as e:
                    log.error(e, exc_info=True)
                    continue
                if len(errors) or jsdata is None:
                    continue
                for url in urlsfound:
                    urlset.add(url)
                # The following loop is required to "JSONify" the strings returned from PyV8.
                # As PyV8 returns byte strings, we must parse out bytecode and
                # replace it with an escape '\'. We can't use encode("string_escape")
                # as this would mess up the new line representation which is used for
                # beautifying the javascript code for Django's web interface.
                ret_data = ""
                for char in jsdata:
                    tmp = f"\\x{char.encode().hex()}" if ord(
                        char) > 127 else char
                    ret_data += tmp
                obj_data["Data"] = ret_data
                retobjects.append(obj_data)
            elif details.type == "dictionary" and details.hasElement("/A"):
                # verify it to be a link type annotation
                subtype_elem = details.getElementByName("/Subtype")
                type_elem = details.getElementByName("/Type")
                if not subtype_elem or not type_elem:
                    continue
                subtype_elem = _get_obj_val(pdf, i, subtype_elem)
                type_elem = _get_obj_val(pdf, i, type_elem)
                if subtype_elem.getValue() != "/Link" or type_elem.getValue(
                ) != "/Annot":
                    continue
                a_elem = details.getElementByName("/A")
                a_elem = _get_obj_val(pdf, i, a_elem)
                if a_elem.type == "dictionary" and a_elem.hasElement("/URI"):
                    uri_elem = a_elem.getElementByName("/URI")
                    uri_elem = _get_obj_val(pdf, i, uri_elem)
                    annoturiset.add(base_uri + uri_elem.getValue())
        pdfresult["JSStreams"] = retobjects
    if "creator" in metadata:
        pdfresult["Info"]["Creator"] = convert_to_printable(
            _clean_string(metadata["creator"]))
    if "producer" in metadata:
        pdfresult["Info"]["Producer"] = convert_to_printable(
            _clean_string(metadata["producer"]))
    if "author" in metadata:
        pdfresult["Info"]["Author"] = convert_to_printable(
            _clean_string(metadata["author"]))
    if len(urlset):
        pdfresult["JS_URLs"] = list(urlset)
    if len(annoturiset):
        pdfresult["Annot_URLs"] = list(annoturiset)

    return pdfresult
Ejemplo n.º 17
0
def ProcessFile(path):
    if not(os.path.isfile(path)):
        print '{0} not a file!'.format(path)
        return 2

    try:
        data = {}
        data['valid'] = True  
        pdfdata     = {}        
        pdfParser   = PDFParser()
        _,pdf       = pdfParser.parse(path, True)
             
        if not pdf:
            data['valid'] = False
        else:      
            errors  = []
            streams = []  
            
            # general info
            statsDict       = pdf.getStats()
            try:
                data['info'] = json.dumps(statsDict, indent=4, sort_keys=False)
            except Exception as e:
                data['info'] = e
            
            # enumerate errors
            if hasattr(pdf, 'errors'):
                errors.extend(pdf.errors)
            
            # enumerate streams
            statsDict = pdf.getStats()
            for versionId, statsVersion in enumerate(statsDict['Versions']):
                for objid in statsVersion['Objects'][1]:            
                    obj = pdf.getObject(objid, versionId)
                    if not obj:
                        continue
                    
                    stream = {}
                    stream['id']            = objid
                    stream['type']          = obj.getType()
                    stream['attributes']    = {}
                    stream['has_js']        = obj.containsJScode
                                        
                    if hasattr(obj, 'elements'):
                        for key in obj.elements:
                            element = obj.elements[key]
                            stream['attributes'][key] = convert_to_printable(element.value)
                                        
                    if obj.getType() == 'stream':
                        stream['data_len'] = obj.size
                        value = obj.getStream()
                        if value != -1:
                            stream['data'] = convert_to_printable(value)
                            
                    streams.append(stream)
                    
            pdfdata['streams']  = streams
            pdfdata['errors']   = errors          
                        
        data['data'] = pdfdata
        encoded = json.dumps(data)
        print encoded
    except Exception as ex:
        data = {}
        data['valid'] = False
        data['error'] = str(ex)
        print json.dumps(data)
        return 1
        
    return 0
Ejemplo n.º 18
0
def main():
    global COLORIZED_OUTPUT

    argsParser = optparse.OptionParser(
        usage='Usage: peepdf.py [options] PDF_file', description=versionHeader)
    argsParser.add_option('-i',
                          '--interactive',
                          action='store_true',
                          dest='isInteractive',
                          default=False,
                          help='Sets console mode.')
    argsParser.add_option(
        '-s',
        '--load-script',
        action='store',
        type='string',
        dest='scriptFile',
        help='Loads the commands stored in the specified file and execute them.'
    )
    argsParser.add_option(
        '-c',
        '--check-vt',
        action='store_true',
        dest='checkOnVT',
        default=False,
        help='Checks the hash of the PDF file on VirusTotal.')
    argsParser.add_option('-f',
                          '--force-mode',
                          action='store_true',
                          dest='isForceMode',
                          default=False,
                          help='Sets force parsing mode to ignore errors.')
    argsParser.add_option(
        '-l',
        '--loose-mode',
        action='store_true',
        dest='isLooseMode',
        default=False,
        help='Sets loose parsing mode to catch malformed objects.')
    argsParser.add_option(
        '-m',
        '--manual-analysis',
        action='store_true',
        dest='isManualAnalysis',
        default=False,
        help=
        'Avoids automatic Javascript analysis. Useful with eternal loops like heap spraying.'
    )
    argsParser.add_option(
        '-g',
        '--grinch-mode',
        action='store_true',
        dest='avoidColors',
        default=False,
        help='Avoids colorized output in the interactive console.')
    argsParser.add_option('-v',
                          '--version',
                          action='store_true',
                          dest='version',
                          default=False,
                          help='Shows program\'s version number.')
    argsParser.add_option('-x',
                          '--xml',
                          action='store_true',
                          dest='xmlOutput',
                          default=False,
                          help='Shows the document information in XML format.')
    argsParser.add_option(
        '-j',
        '--json',
        action='store_true',
        dest='jsonOutput',
        default=False,
        help='Shows the document information in JSON format.')
    argsParser.add_option(
        '-C',
        '--command',
        action='append',
        type='string',
        dest='commands',
        help='Specifies a command from the interactive console to be executed.'
    )
    (options, args) = argsParser.parse_args()

    stats = ""
    pdf = None
    fileName = None
    statsDict = None
    vtJsonDict = None

    try:
        # Avoid colors in the output
        if not COLORIZED_OUTPUT or options.avoidColors:
            warningColor = ''
            errorColor = ''
            alertColor = ''
            staticColor = ''
            resetColor = ''
        else:
            warningColor = Fore.YELLOW
            errorColor = Fore.RED
            alertColor = Fore.RED
            staticColor = Fore.BLUE
            resetColor = Style.RESET_ALL

        if options.version:
            print peepdfHeader
        else:
            if len(args) == 1:
                fileName = args[0]
                if not os.path.exists(fileName):
                    sys.exit('Error: The file "' + fileName +
                             '" does not exist!!')
            elif len(args) > 1 or (len(args) == 0
                                   and not options.isInteractive):
                sys.exit(argsParser.print_help())

            if options.scriptFile is not None:
                if not os.path.exists(options.scriptFile):
                    sys.exit('Error: The script file "' + options.scriptFile +
                             '" does not exist!!')

            if fileName is not None:
                pdfParser = PDFParser()
                ret, pdf = pdfParser.parse(fileName, options.isForceMode,
                                           options.isLooseMode,
                                           options.isManualAnalysis)
                if options.checkOnVT:
                    # Checks the MD5 on VirusTotal
                    md5Hash = pdf.getMD5()
                    ret = vtcheck(md5Hash, VT_KEY)
                    if ret[0] == -1:
                        pdf.addError(ret[1])
                    else:
                        vtJsonDict = ret[1]
                        if "response_code" in vtJsonDict:
                            if vtJsonDict['response_code'] == 1:
                                if "positives" in vtJsonDict and "total" in vtJsonDict:
                                    pdf.setDetectionRate([
                                        vtJsonDict['positives'],
                                        vtJsonDict['total']
                                    ])
                                else:
                                    pdf.addError(
                                        'Missing elements in the response from VirusTotal!!'
                                    )
                                if "permalink" in vtJsonDict:
                                    pdf.setDetectionReport(
                                        vtJsonDict['permalink'])
                            else:
                                pdf.setDetectionRate(None)
                        else:
                            pdf.addError('Bad response from VirusTotal!!')
                statsDict = pdf.getStats()

            if options.xmlOutput:
                try:
                    xml = getPeepXML(statsDict, _version, revision)
                    sys.stdout.write(xml)
                except:
                    errorMessage = '*** Error: Exception while generating the XML file!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            elif options.jsonOutput and not options.commands:
                try:
                    jsonReport = getPeepJSON(statsDict, _version, revision)
                    sys.stdout.write(jsonReport)
                except:
                    errorMessage = '*** Error: Exception while generating the JSON report!!'
                    traceback.print_exc(file=open(errorsFile, 'a'))
                    raise Exception('PeepException', 'Send me an email ;)')
            else:
                if COLORIZED_OUTPUT and not options.avoidColors:
                    try:
                        init()
                    except:
                        COLORIZED_OUTPUT = False
                if options.scriptFile is not None:
                    from peepdf.PDFConsole import PDFConsole

                    scriptFileObject = open(options.scriptFile, 'rb')
                    console = PDFConsole(pdf,
                                         VT_KEY,
                                         options.avoidColors,
                                         stdin=scriptFileObject)
                    try:
                        console.cmdloop()
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch mode!!'
                        scriptFileObject.close()
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                elif options.commands is not None:
                    from PDFConsole import PDFConsole

                    console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                    try:
                        for command in options.commands:
                            console.onecmd(command)
                    except:
                        errorMessage = '*** Error: Exception not handled using the batch commands!!'
                        traceback.print_exc(file=open(errorsFile, 'a'))
                        raise Exception('PeepException', 'Send me an email ;)')
                else:
                    if statsDict is not None:
                        if COLORIZED_OUTPUT and not options.avoidColors:
                            beforeStaticLabel = staticColor
                        else:
                            beforeStaticLabel = ''

                        if not JS_MODULE:
                            warningMessage = 'Warning: PyV8 is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not EMU_MODULE:
                            warningMessage = 'Warning: pylibemu is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        if not PIL_MODULE:
                            warningMessage = 'Warning: Python Imaging Library (PIL) is not installed!!'
                            stats += warningColor + warningMessage + resetColor + newLine
                        errors = statsDict['Errors']
                        for error in errors:
                            if error.find('Decryption error') != -1:
                                stats += errorColor + error + resetColor + newLine
                        if stats != '':
                            stats += newLine
                        statsDict = pdf.getStats()

                        stats += beforeStaticLabel + 'File: ' + resetColor + statsDict[
                            'File'] + newLine
                        stats += beforeStaticLabel + 'MD5: ' + resetColor + statsDict[
                            'MD5'] + newLine
                        stats += beforeStaticLabel + 'SHA1: ' + resetColor + statsDict[
                            'SHA1'] + newLine
                        stats += beforeStaticLabel + 'SHA256: ' + resetColor + statsDict[
                            'SHA256'] + newLine
                        stats += beforeStaticLabel + 'Size: ' + resetColor + statsDict[
                            'Size'] + ' bytes' + newLine
                        if options.checkOnVT:
                            if statsDict['Detection'] != []:
                                detectionReportInfo = ''
                                if statsDict['Detection'] is not None:
                                    detectionColor = ''
                                    if COLORIZED_OUTPUT and not options.avoidColors:
                                        detectionLevel = statsDict[
                                            'Detection'][0] / (
                                                statsDict['Detection'][1] / 3)
                                        if detectionLevel == 0:
                                            detectionColor = alertColor
                                        elif detectionLevel == 1:
                                            detectionColor = warningColor
                                    detectionRate = '%s%d%s/%d' % (
                                        detectionColor,
                                        statsDict['Detection'][0], resetColor,
                                        statsDict['Detection'][1])
                                    if statsDict['Detection report'] != '':
                                        detectionReportInfo = (
                                            beforeStaticLabel +
                                            'Detection report: ' + resetColor +
                                            statsDict['Detection report'] +
                                            newLine)
                                else:
                                    detectionRate = 'File not found on VirusTotal'
                                stats += beforeStaticLabel + 'Detection: ' + resetColor + detectionRate + newLine
                                stats += detectionReportInfo
                        stats += beforeStaticLabel + 'Version: ' + resetColor + statsDict[
                            'Version'] + newLine
                        stats += beforeStaticLabel + 'Binary: ' + resetColor + statsDict[
                            'Binary'] + newLine
                        stats += beforeStaticLabel + 'Linearized: ' + resetColor + statsDict[
                            'Linearized'] + newLine
                        stats += beforeStaticLabel + 'Encrypted: ' + resetColor + statsDict[
                            'Encrypted']
                        if statsDict['Encryption Algorithms'] != []:
                            stats += ' ('
                            for algorithmInfo in statsDict[
                                    'Encryption Algorithms']:
                                stats += algorithmInfo[0] + ' ' + str(
                                    algorithmInfo[1]) + ' bits, '
                            stats = stats[:-2] + ')'
                        stats += newLine
                        stats += beforeStaticLabel + 'Updates: ' + resetColor + statsDict[
                            'Updates'] + newLine
                        stats += beforeStaticLabel + 'Objects: ' + resetColor + statsDict[
                            'Objects'] + newLine
                        stats += beforeStaticLabel + 'Streams: ' + resetColor + statsDict[
                            'Streams'] + newLine
                        stats += beforeStaticLabel + 'URIs: ' + resetColor + statsDict[
                            'URIs'] + newLine
                        stats += beforeStaticLabel + 'Comments: ' + resetColor + statsDict[
                            'Comments'] + newLine
                        stats += beforeStaticLabel + 'Errors: ' + resetColor + str(
                            len(statsDict['Errors'])) + newLine * 2
                        for version in range(len(statsDict['Versions'])):
                            statsVersion = statsDict['Versions'][version]
                            stats += beforeStaticLabel + 'Version ' + resetColor + str(
                                version) + ':' + newLine
                            if statsVersion['Catalog'] is not None:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + statsVersion[
                                    'Catalog'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tCatalog: ' + resetColor + 'No' + newLine
                            if statsVersion['Info'] is not None:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + statsVersion[
                                    'Info'] + newLine
                            else:
                                stats += beforeStaticLabel + '\tInfo: ' + resetColor + 'No' + newLine
                            stats += beforeStaticLabel + '\tObjects (' + statsVersion[
                                'Objects'][0] + '): ' + resetColor + str(
                                    statsVersion['Objects'][1]) + newLine
                            if statsVersion['Compressed Objects'] is not None:
                                stats += beforeStaticLabel + '\tCompressed objects (' + statsVersion[
                                    'Compressed Objects'][
                                        0] + '): ' + resetColor + str(
                                            statsVersion['Compressed Objects']
                                            [1]) + newLine
                            if statsVersion['Errors'] is not None:
                                stats += beforeStaticLabel + '\t\tErrors (' + statsVersion[
                                    'Errors'][0] + '): ' + resetColor + str(
                                        statsVersion['Errors'][1]) + newLine
                            stats += beforeStaticLabel + '\tStreams (' + statsVersion[
                                'Streams'][0] + '): ' + resetColor + str(
                                    statsVersion['Streams'][1])
                            if statsVersion['Xref Streams'] is not None:
                                stats += newLine + beforeStaticLabel + '\t\tXref streams (' + statsVersion[
                                    'Xref Streams'][
                                        0] + '): ' + resetColor + str(
                                            statsVersion['Xref Streams'][1])
                            if statsVersion['Object Streams'] is not None:
                                stats += (
                                    newLine + beforeStaticLabel +
                                    '\t\tObject streams (' +
                                    statsVersion['Object Streams'][0] + '): ' +
                                    resetColor +
                                    str(statsVersion['Object Streams'][1]))
                            if int(statsVersion['Streams'][0]) > 0:
                                stats += (newLine + beforeStaticLabel +
                                          '\t\tEncoded (' +
                                          statsVersion['Encoded'][0] + '): ' +
                                          resetColor +
                                          str(statsVersion['Encoded'][1]))
                                if statsVersion['Decoding Errors'] is not None:
                                    stats += (
                                        newLine + beforeStaticLabel +
                                        '\t\tDecoding errors (' +
                                        statsVersion['Decoding Errors'][0] +
                                        '): ' + resetColor +
                                        str(statsVersion['Decoding Errors'][1])
                                    )
                            if statsVersion['URIs'] is not None:
                                stats += (newLine + beforeStaticLabel +
                                          '\tObjects with URIs (' +
                                          statsVersion['URIs'][0] + '): ' +
                                          resetColor +
                                          str(statsVersion['URIs'][1]))
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = warningColor
                            if statsVersion[
                                    'Objects with JS code'] is not None:
                                stats += (
                                    newLine + beforeStaticLabel +
                                    '\tObjects with JS code (' +
                                    statsVersion['Objects with JS code'][0] +
                                    '): ' + resetColor +
                                    str(statsVersion['Objects with JS code']
                                        [1]))
                            actions = statsVersion['Actions']
                            events = statsVersion['Events']
                            vulns = statsVersion['Vulns']
                            elements = statsVersion['Elements']
                            if events is not None or actions is not None or vulns is not None or elements is not None:
                                stats += newLine + beforeStaticLabel + '\tSuspicious elements:' + resetColor + newLine
                                if events is not None:
                                    for event in events:
                                        stats += (
                                            '\t\t' + beforeStaticLabel +
                                            event +
                                            ' (%d): ' % len(events[event]) +
                                            resetColor + str(events[event]) +
                                            newLine)
                                if actions is not None:
                                    for action in actions:
                                        stats += (
                                            '\t\t' + beforeStaticLabel +
                                            action +
                                            ' (%d): ' % len(actions[action]) +
                                            resetColor + str(actions[action]) +
                                            newLine)
                                if vulns is not None:
                                    for vuln in vulns:
                                        if vuln in vulnsDict:
                                            vulnName = vulnsDict[vuln][0]
                                            vulnCVEList = vulnsDict[vuln][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:-1] + ') (%d): ' % len(
                                                vulns[vuln]) + resetColor + str(
                                                    vulns[vuln]) + newLine
                                        else:
                                            stats += (
                                                '\t\t' + beforeStaticLabel +
                                                vuln +
                                                ' (%d): ' % len(vulns[vuln]) +
                                                resetColor + str(vulns[vuln]) +
                                                newLine)
                                if elements is not None:
                                    for element in elements:
                                        if element in vulnsDict:
                                            vulnName = vulnsDict[element][0]
                                            vulnCVEList = vulnsDict[element][1]
                                            stats += '\t\t' + beforeStaticLabel + vulnName + ' ('
                                            for vulnCVE in vulnCVEList:
                                                stats += vulnCVE + ','
                                            stats = stats[:
                                                          -1] + '): ' + resetColor + str(
                                                              elements[element]
                                                          ) + newLine
                                        else:
                                            stats += '\t\t' + beforeStaticLabel + element + ': ' + resetColor + str(
                                                elements[element]) + newLine
                            if COLORIZED_OUTPUT and not options.avoidColors:
                                beforeStaticLabel = staticColor
                            urls = statsVersion['URLs']
                            if urls is not None:
                                stats += newLine + beforeStaticLabel + '\tFound URLs:' + resetColor + newLine
                                for url in urls:
                                    stats += '\t\t' + url + newLine
                            stats += newLine * 2
                    if fileName is not None:
                        print stats
                    if options.isInteractive:
                        from peepdf.PDFConsole import PDFConsole

                        console = PDFConsole(pdf, VT_KEY, options.avoidColors)
                        while not console.leaving:
                            try:
                                console.cmdloop()
                            except KeyboardInterrupt as e:
                                sys.exit()
                            except:
                                errorMessage = '*** Error: Exception not handled using the interactive console!! Please, report it to the author!!'
                                print errorColor + errorMessage + resetColor + newLine
                                traceback.print_exc(file=open(errorsFile, 'a'))
    except Exception as e:
        if len(e.args) == 2:
            excName, excReason = e.args
        else:
            excName = None
        if excName is None or excName != 'PeepException':
            errorMessage = '*** Error: Exception not handled!!'
            traceback.print_exc(file=open(errorsFile, 'a'))
        print errorColor + errorMessage + resetColor + newLine
    finally:
        if os.path.exists(errorsFile):
            message = newLine + 'Please, don\'t forget to report errors if found:' + newLine * 2
            message += '\t- Sending the file "%s" to the author (mailto:[email protected])%s' % (
                errorsFile, newLine)
            message += '\t- And/Or creating an issue on the project webpage (https://github.com/jesparza/peepdf/issues)' + newLine
            message = errorColor + message + resetColor
            sys.exit(message)
Ejemplo n.º 19
0
Archivo: pdf.py Proyecto: 4g3n7/viper
        def get_streams():
            # This function is brutally ripped from Brandon Dixon's swf_mastah.py.

            # Initialize peepdf parser.
            parser = PDFParser()
            # Parse currently opened PDF document.
            ret, pdf = parser.parse(__sessions__.current.file.path, True, False)
            # Generate statistics.

            results = []
            objects = []
            count = 0
            object_counter = 1

            for i in range(len(pdf.body)):
                body = pdf.body[count]
                objects = body.objects

                for index in objects:
                    oid = objects[index].id
                    offset = objects[index].offset
                    size = objects[index].size
                    details = objects[index].object

                    if details.type == 'stream':
                        decoded_stream = details.decodedStream

                        result = [
                            object_counter,
                            oid,
                            offset,
                            size,
                            get_type(decoded_stream)[:100]
                        ]

                        # If the stream needs to be dumped or opened, we do it
                        # and expand the results with the path to the stream dump.
                        if arg_open or arg_dump:
                            # If was instructed to dump, we already have a base folder.
                            if arg_dump:
                                folder = arg_dump
                            # Otherwise we juts generate a temporary one.
                            else:
                                folder = tempfile.gettempdir()

                            # Confirm the dump path
                            if not os.path.exists(folder):
                                try:
                                    os.makedirs(folder)
                                except Exception as e:
                                    self.log('error', "Unable to create directory at {0}: {1}".format(folder, e))
                                    return results
                            else:
                                if not os.path.isdir(folder):
                                    self.log('error', "You need to specify a folder not a file")
                                    return results

                            # Dump stream to this path.
                            # TODO: sometimes there appear to be multiple streams
                            # with the same object ID. Is that even possible?
                            # It will cause conflicts.
                            dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format(folder, __sessions__.current.file.md5, object_counter)

                            with open(dump_path, 'wb') as handle:
                                handle.write(decoded_stream.strip())

                            # Add dump path to the stream attributes.
                            result.append(dump_path)

                        # Update list of streams.
                        results.append(result)

                        object_counter += 1

                count += 1

            return results
Ejemplo n.º 20
0
        def get_streams():
            # This function is brutally ripped from Brandon Dixon's swf_mastah.py.

            # Initialize peepdf parser.
            parser = PDFParser()
            # Parse currently opened PDF document.
            ret, pdf = parser.parse(__sessions__.current.file.path, True,
                                    False)
            # Generate statistics.

            results = []
            objects = []
            count = 0
            object_counter = 1

            for i in range(len(pdf.body)):
                body = pdf.body[count]
                objects = body.objects

                for index in objects:
                    oid = objects[index].id
                    offset = objects[index].offset
                    size = objects[index].size
                    details = objects[index].object

                    if details.type == 'stream':
                        decoded_stream = details.decodedStream

                        result = [
                            object_counter, oid, offset, size,
                            get_type(decoded_stream)[:100]
                        ]

                        # If the stream needs to be dumped or opened, we do it
                        # and expand the results with the path to the stream dump.
                        if arg_open or arg_dump:
                            # If was instructed to dump, we already have a base folder.
                            if arg_dump:
                                folder = arg_dump
                            # Otherwise we juts generate a temporary one.
                            else:
                                folder = tempfile.gettempdir()

                            # Confirm the dump path
                            if not os.path.exists(folder):
                                try:
                                    os.makedirs(folder)
                                except Exception as e:
                                    self.log(
                                        'error',
                                        "Unable to create directory at {0}: {1}"
                                        .format(folder, e))
                                    return results
                            else:
                                if not os.path.isdir(folder):
                                    self.log(
                                        'error',
                                        "You need to specify a folder not a file"
                                    )
                                    return results

                            # Dump stream to this path.
                            # TODO: sometimes there appear to be multiple streams
                            # with the same object ID. Is that even possible?
                            # It will cause conflicts.
                            dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format(
                                folder, __sessions__.current.file.md5,
                                object_counter)

                            with open(dump_path, 'wb') as handle:
                                handle.write(decoded_stream.strip())

                            # Add dump path to the stream attributes.
                            result.append(dump_path)

                        # Update list of streams.
                        results.append(result)

                        object_counter += 1

                count += 1

            return results