def test_save_stats_text(self): """Test the _save_stats method""" patch("pad.plugins.pdf_info.PDFInfoPlugin._update_details").start() patch( "pad.plugins.pdf_info.PDFInfoPlugin._update_image_counts").start() patch("pad.plugins.pdf_info.PDFInfoPlugin._update_pixel_coverage" ).start() datastore = BytesIO() with open("tests/data/pdftest_text.pdf", "rb") as pdf_file: datastore.write(pdf_file.read()) if not datastore.getvalue(): return pdf_id = md5(datastore.getvalue()).hexdigest() pdf_object = PyPDF2.PdfFileReader(datastore) info = pdf_object.getDocumentInfo() update_details_calls = [ call(self.mock_msg, pdf_id, "author", info.author), call(self.mock_msg, pdf_id, "creator", info.creator), call(self.mock_msg, pdf_id, "producer", info.producer), call(self.mock_msg, pdf_id, "title", info.title) ] pdfs = {1: {"data": datastore, "name": "pdftest.pdf"}} self.mock_msg.msg = new_email(pdfs) for part in self.mock_msg.msg.walk(): payload = part.get_payload(decode=True) if payload is None: continue self.plugin._save_stats(self.mock_msg, payload) self.plugin._update_details.assert_has_calls(update_details_calls)
def test_save_stats_text(self): """Test the _save_stats method""" patch("pad.plugins.pdf_info.PDFInfoPlugin._update_details").start() patch( "pad.plugins.pdf_info.PDFInfoPlugin._update_image_counts").start() patch( "pad.plugins.pdf_info.PDFInfoPlugin._update_pixel_coverage").start() datastore = BytesIO() with open("tests/data/pdftest_text.pdf", "rb") as pdf_file: datastore.write(pdf_file.read()) if not datastore.getvalue(): return pdf_id = md5(datastore.getvalue()).hexdigest() pdf_object = PyPDF2.PdfFileReader(datastore) info = pdf_object.getDocumentInfo() update_details_calls = [ call(self.mock_msg, pdf_id, "author", info.author), call(self.mock_msg, pdf_id, "creator", info.creator), call(self.mock_msg, pdf_id, "producer", info.producer), call(self.mock_msg, pdf_id, "title", info.title) ] pdfs = {1: {"data": datastore, "name": "pdftest.pdf"}} self.mock_msg.msg = new_email(pdfs) for part in self.mock_msg.msg.walk(): payload = part.get_payload(decode=True) if payload is None: continue self.plugin._save_stats(self.mock_msg, payload) self.plugin._update_details.assert_has_calls(update_details_calls)
def test_extract_metadata(self): patch("oa.plugins.pdf_info.PDFInfoPlugin._add_name").start() patch("oa.plugins.pdf_info.PDFInfoPlugin._update_counts").start() patch("oa.plugins.pdf_info.PDFInfoPlugin._save_stats").start() add_name_calls = [] update_counts_calls = [] save_stats_calls = [] pdfs = {} allpdfinfo = ( {"details": {"/Author": "Author1", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest"}}, {"details": {"/Author": "Author2", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest2"}}, {"details": {"/Author": "Author3", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest3"}}, {"details": {"/Author": "Author4", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest4"}, "images": ("image1", (100, 100))}, ) for i, pdf_info in enumerate(allpdfinfo): name = "%d.pdf" % i if "images" not in pdf_info: pdf_object = new_pdf(details=pdf_info["details"], name=name) else: pdfc = PDFWithAttachments( details=pdf_info["details"], name=name) image = new_image_string(pdf_info["images"][1]) pdfc.addAttachment(pdf_info["images"][0], image) pdf_object = {"data": pdfc.as_file(), "name": name} pdfs.update({i: pdf_object}) add_name_calls.append(call(self.mock_msg, name)) update_counts_calls.append(call(self.mock_msg, incr=1)) save_stats_calls.append(call(self.mock_msg, pdf_object["data"].read())) self.mock_msg.msg = new_email(pdfs) for part in self.mock_msg.msg.walk(): payload = part.get_payload(decode=True) self.plugin.extract_metadata(self.mock_msg, payload, None, part) self.plugin._add_name.assert_has_calls(add_name_calls) self.plugin._update_counts.assert_has_calls(update_counts_calls) self.plugin._save_stats.assert_has_calls(save_stats_calls)
def test_extract_metadata(self): patch("pad.plugins.pdf_info.PDFInfoPlugin._add_name").start() patch("pad.plugins.pdf_info.PDFInfoPlugin._update_counts").start() patch("pad.plugins.pdf_info.PDFInfoPlugin._save_stats").start() add_name_calls = [] update_counts_calls = [] save_stats_calls = [] pdfs = {} allpdfinfo = ( {"details": {"/Author": "Author1", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest"}}, {"details": {"/Author": "Author2", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest2"}}, {"details": {"/Author": "Author3", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest3"}}, {"details": {"/Author": "Author4", "/Creator": "unittest", "/Created": "2016-02-11", "/Title": "pdftest4"}, "images": ("image1", (100, 100))}, ) for i, pdf_info in enumerate(allpdfinfo): name = "%d.pdf" % i if "images" not in pdf_info: pdf_object = new_pdf(details=pdf_info["details"], name=name) else: pdfc = PDFWithAttachments( details=pdf_info["details"], name=name) image = new_image_string(pdf_info["images"][1]) pdfc.addAttachment(pdf_info["images"][0], image) pdf_object = {"data": pdfc.as_file(), "name": name} pdfs.update({i: pdf_object}) add_name_calls.append(call(self.mock_msg, name)) update_counts_calls.append(call(self.mock_msg, incr=1)) save_stats_calls.append(call(self.mock_msg, pdf_object["data"].read())) self.mock_msg.msg = new_email(pdfs) for part in self.mock_msg.msg.walk(): payload = part.get_payload(decode=True) self.plugin.extract_metadata(self.mock_msg, payload, None, part) self.plugin._add_name.assert_has_calls(add_name_calls) self.plugin._update_counts.assert_has_calls(update_counts_calls) self.plugin._save_stats.assert_has_calls(save_stats_calls)