예제 #1
0
 def convert(self, data, idatastream, filename=None, **kwargs):
     """Convert the data, store the result in idata and return that.
     """
     cache_dir = self.cache_dir or None
     cache_key = self.get_cache_key('cache_key_pdf', idatastream)
     filename = filename or 'unknown.odt'
     if not filename.lower().endswith('.odt'):
         filename += '.odt'
     document = Document(filename, data, cache_dir=cache_dir)
     pdf, cache_key = document.convertToPDF(cache_key=cache_key)
     idatastream.getMetadata()['cache_key_pdf'] = cache_key
     idatastream.setData(pdf)
     return idatastream
예제 #2
0
 def test_convert_to_pdf_cached_wo_cache_key(self):
     # We can get a cached doc also without a cache key (but
     # it is extensive)
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf1, cache_key1 = self.doc.convertToPDF()  # store doc in cache
     # modfiy result to distuingish it from freshly converted doc
     from ulif.openoffice.cachemanager import CacheManager
     cm = CacheManager(self.workdir)
     cached_path = cm.get_cached_file(cache_key1)
     open(cached_path, 'wb').write('My Fake Result')
     # now re-get the document. We should get the cached copy
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf2, cache_key2 = self.doc.convertToPDF()
     self.assertEqual(pdf2, 'My Fake Result')
     self.assertEqual(cache_key2, cache_key1)
예제 #3
0
 def test_convert_to_pdf_w_cache_key(self):
     # Cached docs are retrieved
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf1, cache_key1 = self.doc.convertToPDF()  # store doc in cache
     pdf2, cache_key2 = self.doc.convertToPDF(cache_key=cache_key1)
     assert pdf1 == pdf2
     assert cache_key1 == cache_key2
예제 #4
0
 def test_convert_to_pdf(self):
     # We can convert docs to PDF
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     pdf, cache_key = self.doc.convertToPDF()
     self.assertEqual(pdf[:6], '%PDF-1')
     # no cache_dir, no cached doc
     assert cache_key is None
예제 #5
0
 def test_convert_w_cache_key(self):
     # Cached docs are retrieved
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     html1, cache_key1 = self.doc.convert()  # store doc in cache
     html2, cache_key2 = self.doc.convert(cache_key=cache_key1)
     assert html1 == html2
     assert cache_key1 == cache_key2
예제 #6
0
 def test_convert_w_cache_dir(self):
     # We can cache after converting
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     html, cache_key = self.doc.convert()
     assert 'A simple document.' in html
     assert '</p>' in html
     self.assertEqual(cache_key, 'cc8c3b702ca3865608732f612691978b_1_1')
예제 #7
0
 def test_subobjects_no_files(self):
     # We get all kinds of files (except .html) when looking for
     # subobjects.
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     path, filenames = self.doc.subObjects(self.workdir)
     self.assertEqual(path, self.workdir + '/')
     self.assertEqual(filenames, [])
예제 #8
0
 def test_del_removes_tmp_dir(self):
     # Deleted `Document`s do not leave any temp dirs
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     path = self.doc.fullname
     assert os.path.isfile(path)
     del self.doc
     assert not os.path.isfile(path)
예제 #9
0
 def test_convert(self):
     # We can convert docs to HTML
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     html, cache_key = self.doc.convert()
     assert 'A simple document.' in html
     assert '</p>' in html
     # no cache_dir, no cached doc
     assert cache_key is None
예제 #10
0
 def test_subobjects_usual_image_files(self):
     # usual image files and css files are found by subObjects()
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     for name in ['fake.gif', 'fake.jpg', 'fake.png', 'styles.css']:
         open(os.path.join(self.workdir, name), 'w').write('')
     path, filenames = self.doc.subObjects(self.workdir)
     assert sorted(filenames) == [
         'fake.gif', 'fake.jpg', 'fake.png', 'styles.css']
예제 #11
0
 def convert(self, data, idatastream, filename=None, mimetype=None,
             **kwargs):
     """Convert the data, store the result in idata and return that.
     """
     cache_dir = self.cache_dir or None
     cache_key = self.get_cache_key('cache_key_pdf', idatastream)
     extension = '.doc'
     if mimetype is not None:
         if mimetype == self.inputs[1]:
             extension = '.docx'
     filename = filename or 'unknown' + extension
     if not (filename.lower().endswith('.doc') or
             filename.lower().endswith('.docx')):
         filename += extension
     document = Document(filename, data, cache_dir=cache_dir)
     pdf, cache_key = document.convertToPDF(cache_key=cache_key)
     idatastream.getMetadata()['cache_key_pdf'] = cache_key
     idatastream.setData(pdf)
     return idatastream
예제 #12
0
 def convert(self, data, idatastream, filename='unknown', **kwargs):
     """Convert the data, store the result in idata and return that.
     """
     filename = filename or 'unknown.doc'
     cache_dir = self.cache_dir or None
     cache_key = self.get_cache_key('cache_key_html', idatastream)
     document = Document(filename, data, cache_dir=cache_dir)
     html, cache_key = document.convert(cache_key=cache_key)
     sub_objects_paths = [document.tmpdir,
                          os.path.join(document.tmpdir, 'Pictures')]
     objects = {}
     for path in sub_objects_paths:
         if os.path.exists(path):
             spath, images = document.subObjects(path)
             if images:
                 document.fixImages(spath, images, objects)
     idatastream.getMetadata()['cache_key_html'] = cache_key
     idatastream.setData(html)
     idatastream.setSubObjects(objects)
     return idatastream
예제 #13
0
    def convert(self, data, idatastream, filename=None, **kwargs):
        """Convert the data, store the result in idata and return that.

        If a cache key can be retrieved, it is stored under key
        ``cache_key`` in `idatastream` metadata.
        """
        filename = filename or "unknown.odt"
        cache_dir = self.cache_dir or None
        cache_key = self.get_cache_key("cache_key_html", idatastream)
        document = Document(filename, data, cache_dir=cache_dir)
        html, cache_key = document.convert(cache_key=cache_key)
        sub_objects_paths = [document.tmpdir, os.path.join(document.tmpdir, "Pictures")]
        objects = {}
        for path in sub_objects_paths:
            if os.path.exists(path):
                spath, images = document.subObjects(path)
                if images:
                    document.fixImages(spath, images, objects)
        idatastream.getMetadata()["cache_key_html"] = cache_key
        idatastream.setData(html)
        idatastream.setSubObjects(objects)
        return idatastream
예제 #14
0
class DocumentTests(unittest.TestCase):

    def setUp(self):
        self.workdir = tempfile.mkdtemp()
        input_dir = os.path.join(os.path.dirname(__file__), 'input')
        self.doc_simple1_path = os.path.join(input_dir, 'simpledoc1.doc')
        self.doc_simple1 = open(self.doc_simple1_path, 'rb').read()
        self.idata = datastream('mytestdoc.doc')
        self.idata.setData(self.doc_simple1)
        self.doc = None   # to be set by tests

    def tearDown(self):
        shutil.rmtree(self.workdir)

    def test_attribs(self):
        # Documents have some attributes, notably a tmpdir and a fullpath
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        assert self.doc.tmpdir is not None
        assert self.doc.fullname[-14:] == '/mytestdoc.doc'
        assert self.doc.cache_dir is None

    def test_del_removes_tmp_dir(self):
        # Deleted `Document`s do not leave any temp dirs
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        path = self.doc.fullname
        assert os.path.isfile(path)
        del self.doc
        assert not os.path.isfile(path)

    def test_subobjects_no_files(self):
        # We get all kinds of files (except .html) when looking for
        # subobjects.
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        path, filenames = self.doc.subObjects(self.workdir)
        self.assertEqual(path, self.workdir + '/')
        self.assertEqual(filenames, [])

    def test_subobjects_usual_image_files(self):
        # usual image files and css files are found by subObjects()
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        for name in ['fake.gif', 'fake.jpg', 'fake.png', 'styles.css']:
            open(os.path.join(self.workdir, name), 'w').write('')
        path, filenames = self.doc.subObjects(self.workdir)
        assert sorted(filenames) == [
            'fake.gif', 'fake.jpg', 'fake.png', 'styles.css']

    def test_convert(self):
        # We can convert docs to HTML
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        html, cache_key = self.doc.convert()
        assert 'A simple document.' in html
        assert '</p>' in html
        # no cache_dir, no cached doc
        assert cache_key is None

    def test_convert_w_cache_dir(self):
        # We can cache after converting
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        html, cache_key = self.doc.convert()
        assert 'A simple document.' in html
        assert '</p>' in html
        self.assertEqual(cache_key, 'cc8c3b702ca3865608732f612691978b_1_1')

    def test_convert_w_cache_key(self):
        # Cached docs are retrieved
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        html1, cache_key1 = self.doc.convert()  # store doc in cache
        html2, cache_key2 = self.doc.convert(cache_key=cache_key1)
        assert html1 == html2
        assert cache_key1 == cache_key2

    def test_convert_to_pdf(self):
        # We can convert docs to PDF
        self.doc = Document('mytestdoc.doc', self.doc_simple1)
        pdf, cache_key = self.doc.convertToPDF()
        self.assertEqual(pdf[:6], '%PDF-1')
        # no cache_dir, no cached doc
        assert cache_key is None

    def test_convert_to_pdf_w_cache_dir(self):
        # We can cache after converting to PDF
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        pdf, cache_key = self.doc.convertToPDF()
        self.assertEqual(pdf[:6], '%PDF-1')
        self.assertEqual(cache_key, 'cc8c3b702ca3865608732f612691978b_1_1')

    def test_convert_to_pdf_w_cache_key(self):
        # Cached docs are retrieved
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        pdf1, cache_key1 = self.doc.convertToPDF()  # store doc in cache
        pdf2, cache_key2 = self.doc.convertToPDF(cache_key=cache_key1)
        assert pdf1 == pdf2
        assert cache_key1 == cache_key2

    def test_convert_to_pdf_cached_wo_cache_key(self):
        # We can get a cached doc also without a cache key (but
        # it is extensive)
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        pdf1, cache_key1 = self.doc.convertToPDF()  # store doc in cache
        # modfiy result to distuingish it from freshly converted doc
        from ulif.openoffice.cachemanager import CacheManager
        cm = CacheManager(self.workdir)
        cached_path = cm.get_cached_file(cache_key1)
        open(cached_path, 'wb').write('My Fake Result')
        # now re-get the document. We should get the cached copy
        self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
        pdf2, cache_key2 = self.doc.convertToPDF()
        self.assertEqual(pdf2, 'My Fake Result')
        self.assertEqual(cache_key2, cache_key1)
예제 #15
0
 def test_convert_to_pdf_w_cache_dir(self):
     # We can cache after converting to PDF
     self.doc = Document('mytestdoc.doc', self.doc_simple1, self.workdir)
     pdf, cache_key = self.doc.convertToPDF()
     self.assertEqual(pdf[:6], '%PDF-1')
     self.assertEqual(cache_key, 'cc8c3b702ca3865608732f612691978b_1_1')
예제 #16
0
 def test_attribs(self):
     # Documents have some attributes, notably a tmpdir and a fullpath
     self.doc = Document('mytestdoc.doc', self.doc_simple1)
     assert self.doc.tmpdir is not None
     assert self.doc.fullname[-14:] == '/mytestdoc.doc'
     assert self.doc.cache_dir is None