Exemple #1
0
class TestSlate(unittest.TestCase):
    def setUp(self):
        with open('example.pdf', 'rb') as f:
            self.doc = PDF(f)
        with open('protected.pdf', 'rb') as f:
            self.passwd = PDF(f, 'a')

    def test_basic(self):
        assert self.doc[0] == 'This is a test.\x0c'

    def test_metadata_extraction(self):
        assert self.doc.metadata

    def test_text_method(self):
        assert "This is a test" in self.doc.text()

    def test_text_method_unclean(self):
        assert '\x0c' in self.doc.text(clean=0)

    def test_password(self):
        assert self.passwd[0] == "Chamber of secrets.\x0c"
Exemple #2
0
class TestSlate(unittest.TestCase):
    def setUp(self):
        with open('example.pdf', 'rb') as f:
            self.doc = PDF(f)
        with open('protected.pdf', 'rb') as f:
            self.passwd = PDF(f, 'a')

    def test_basic(self):
        assert self.doc[0] == 'This is a test.\n\n\x0c'

    def test_no_text_carry_over(self):
        assert self.doc[1] == '\x0c'

    def test_metadata_extraction(self):
        assert self.doc.metadata

    def test_text_method(self):
        assert "This is a test" in self.doc.text()

    def test_text_method_unclean(self):
        assert '\x0c' in self.doc.text(clean=0)

    def test_password(self):
        assert self.passwd[0] == "Chamber of secrets.\n\n\x0c"
Exemple #3
0
    def read(self):
        """Returns a file's text data
        For now this only considers pdf files.
        if the file cannot be read this will return an empty string.
        """

        if not os.path.exists(self.file.path):
            return unicode()

        if self.type() == 'pdf':

            try:
                doc = PDF(self.file.file)
            except PDF.PDFSyntaxError:
                return unicode()

            return doc.text()

        return unicode()
Exemple #4
0
    def read(self):
        """Returns a file's text data
        For now this only considers pdf files.
        if the file cannot be read this will return an empty string.
        """

        if not settings.USE_S3_STORAGE:
            if not os.path.exists(self.file.path):
                return unicode()

        if self.type() == 'pdf':

            try:
                doc = PDF(self.file.file)
            except:
                return unicode()

            return doc.text()

        return unicode()
Exemple #5
0
    def read(self):
        """Returns a file's text data
        For now this only considers pdf files.
        if the file cannot be read this will return an empty string.
        """

        if not settings.USE_S3_STORAGE:
            if not os.path.exists(self.file.path):
                return unicode()

        if settings.INDEX_FILE_CONTENT:
            if self.type() == 'pdf':

                try:
                    doc = PDF(self.file.file)
                except:
                    return unicode()

                return doc.text()

        return unicode()