def handle_pdf(self, url, content): sample = log.ThugLogging.build_sample(content, url) if sample is None or sample['type'] not in ('PDF', ): return fd, rfile = tempfile.mkstemp() with open(rfile, 'wb') as fd: fd.write(content) pdfparser = PDFParser() try: ret, pdf = pdfparser.parse(rfile, forceMode=True, looseMode=True) except: os.remove(rfile) return False statsDict = pdf.getStats() analysis = self.getPeepXML(statsDict, url) log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf") log.ThugLogging.log_peepdf(log_dir, sample, analysis) self.swf_mastah(pdf, statsDict, url) os.remove(rfile) return True
def handle_pdf(self, url, content): sample = log.ThugLogging.build_sample(content, url) if sample is None or sample['type'] not in ('PDF', ): return fd, rfile = tempfile.mkstemp() with open(rfile, 'wb') as fd: fd.write(content) pdfparser = PDFParser() try: ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True) #pylint:disable=unused-variable except: #pylint:disable=bare-except os.remove(rfile) return False statsDict = pdf.getStats() analysis = self.getPeepXML(statsDict, url) log_dir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf") log.ThugLogging.log_peepdf(log_dir, sample, analysis) self.swf_mastah(pdf, statsDict, url) os.remove(rfile) return True
def handle_pdf(self, url, content): m = hashlib.md5() m.update(content) md5sum = m.hexdigest() rfile = log.ThugLogging.store_content(log.ThugLogging.baseDir, md5sum, content) pdfparser = PDFParser() try: ret, pdf = pdfparser.parse(rfile, forceMode = True, looseMode = True) except: os.remove(rfile) return False statsDict = pdf.getStats() analysis = self.getPeepXML(statsDict, url) pdflogdir = os.path.join(log.ThugLogging.baseDir, "analysis", "pdf") try: os.makedirs(pdflogdir) except: pass log.ThugLogging.store_content(pdflogdir, "%s.xml" % (statsDict["MD5"], ), analysis) self.swf_mastah(pdf, statsDict) os.remove(rfile) return True