Exemple #1
0
    def test_save_stats_text(self):
        """Test the _save_stats method"""
        patch("pad.plugins.pdf_info.PDFInfoPlugin._update_details").start()
        patch(
            "pad.plugins.pdf_info.PDFInfoPlugin._update_image_counts").start()
        patch("pad.plugins.pdf_info.PDFInfoPlugin._update_pixel_coverage"
              ).start()
        datastore = BytesIO()
        with open("tests/data/pdftest_text.pdf", "rb") as pdf_file:
            datastore.write(pdf_file.read())
        if not datastore.getvalue():
            return
        pdf_id = md5(datastore.getvalue()).hexdigest()
        pdf_object = PyPDF2.PdfFileReader(datastore)
        info = pdf_object.getDocumentInfo()
        update_details_calls = [
            call(self.mock_msg, pdf_id, "author", info.author),
            call(self.mock_msg, pdf_id, "creator", info.creator),
            call(self.mock_msg, pdf_id, "producer", info.producer),
            call(self.mock_msg, pdf_id, "title", info.title)
        ]
        pdfs = {1: {"data": datastore, "name": "pdftest.pdf"}}
        self.mock_msg.msg = new_email(pdfs)

        for part in self.mock_msg.msg.walk():
            payload = part.get_payload(decode=True)
            if payload is None:
                continue
            self.plugin._save_stats(self.mock_msg, payload)

        self.plugin._update_details.assert_has_calls(update_details_calls)
Exemple #2
0
    def test_save_stats_text(self):
        """Test the _save_stats method"""
        patch("pad.plugins.pdf_info.PDFInfoPlugin._update_details").start()
        patch(
            "pad.plugins.pdf_info.PDFInfoPlugin._update_image_counts").start()
        patch(
            "pad.plugins.pdf_info.PDFInfoPlugin._update_pixel_coverage").start()
        datastore = BytesIO()
        with open("tests/data/pdftest_text.pdf", "rb") as pdf_file:
            datastore.write(pdf_file.read())
        if not datastore.getvalue():
            return
        pdf_id = md5(datastore.getvalue()).hexdigest()
        pdf_object = PyPDF2.PdfFileReader(datastore)
        info = pdf_object.getDocumentInfo()
        update_details_calls = [
            call(self.mock_msg, pdf_id, "author", info.author),
            call(self.mock_msg, pdf_id, "creator", info.creator),
            call(self.mock_msg, pdf_id, "producer", info.producer),
            call(self.mock_msg, pdf_id, "title", info.title)
        ]
        pdfs = {1: {"data": datastore, "name": "pdftest.pdf"}}
        self.mock_msg.msg = new_email(pdfs)

        for part in self.mock_msg.msg.walk():
            payload = part.get_payload(decode=True)
            if payload is None:
                continue
            self.plugin._save_stats(self.mock_msg, payload)

        self.plugin._update_details.assert_has_calls(update_details_calls)
Exemple #3
0
    def test_extract_metadata(self):
        patch("oa.plugins.pdf_info.PDFInfoPlugin._add_name").start()
        patch("oa.plugins.pdf_info.PDFInfoPlugin._update_counts").start()
        patch("oa.plugins.pdf_info.PDFInfoPlugin._save_stats").start()
        add_name_calls = []
        update_counts_calls = []
        save_stats_calls = []
        pdfs = {}
        allpdfinfo = (
            {"details": {"/Author": "Author1", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest"}},
            {"details": {"/Author": "Author2", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest2"}},
            {"details": {"/Author": "Author3", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest3"}},
            {"details": {"/Author": "Author4", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest4"},
             "images": ("image1", (100, 100))},
        )
        for i, pdf_info in enumerate(allpdfinfo):
            name = "%d.pdf" % i
            if "images" not in pdf_info:
                pdf_object = new_pdf(details=pdf_info["details"], name=name)
            else:
                pdfc = PDFWithAttachments(
                    details=pdf_info["details"], name=name)
                image = new_image_string(pdf_info["images"][1])
                pdfc.addAttachment(pdf_info["images"][0], image)
                pdf_object = {"data": pdfc.as_file(), "name": name}
            pdfs.update({i: pdf_object})
            add_name_calls.append(call(self.mock_msg, name))
            update_counts_calls.append(call(self.mock_msg, incr=1))
            save_stats_calls.append(call(self.mock_msg,
                                         pdf_object["data"].read()))

        self.mock_msg.msg = new_email(pdfs)

        for part in self.mock_msg.msg.walk():
            payload = part.get_payload(decode=True)
            self.plugin.extract_metadata(self.mock_msg, payload, None, part)

        self.plugin._add_name.assert_has_calls(add_name_calls)
        self.plugin._update_counts.assert_has_calls(update_counts_calls)
        self.plugin._save_stats.assert_has_calls(save_stats_calls)
Exemple #4
0
    def test_extract_metadata(self):
        patch("pad.plugins.pdf_info.PDFInfoPlugin._add_name").start()
        patch("pad.plugins.pdf_info.PDFInfoPlugin._update_counts").start()
        patch("pad.plugins.pdf_info.PDFInfoPlugin._save_stats").start()
        add_name_calls = []
        update_counts_calls = []
        save_stats_calls = []
        pdfs = {}
        allpdfinfo = (
            {"details": {"/Author": "Author1", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest"}},
            {"details": {"/Author": "Author2", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest2"}},
            {"details": {"/Author": "Author3", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest3"}},
            {"details": {"/Author": "Author4", "/Creator": "unittest",
                         "/Created": "2016-02-11", "/Title": "pdftest4"},
             "images": ("image1", (100, 100))},
        )
        for i, pdf_info in enumerate(allpdfinfo):
            name = "%d.pdf" % i
            if "images" not in pdf_info:
                pdf_object = new_pdf(details=pdf_info["details"], name=name)
            else:
                pdfc = PDFWithAttachments(
                    details=pdf_info["details"], name=name)
                image = new_image_string(pdf_info["images"][1])
                pdfc.addAttachment(pdf_info["images"][0], image)
                pdf_object = {"data": pdfc.as_file(), "name": name}
            pdfs.update({i: pdf_object})
            add_name_calls.append(call(self.mock_msg, name))
            update_counts_calls.append(call(self.mock_msg, incr=1))
            save_stats_calls.append(call(self.mock_msg,
                                         pdf_object["data"].read()))

        self.mock_msg.msg = new_email(pdfs)

        for part in self.mock_msg.msg.walk():
            payload = part.get_payload(decode=True)
            self.plugin.extract_metadata(self.mock_msg, payload, None, part)

        self.plugin._add_name.assert_has_calls(add_name_calls)
        self.plugin._update_counts.assert_has_calls(update_counts_calls)
        self.plugin._save_stats.assert_has_calls(save_stats_calls)