コード例 #1
0
 def test_cleanup_html_fix_img_links(self):
     html_input_path = os.path.join(
         os.path.dirname(__file__), 'input', 'image_sample.html')
     html_input = open(html_input_path, 'rb').read()
     result, img_map = cleanup_html(
         html_input, 'sample.html', fix_img_links=True)
     assert len(img_map) == 4
コード例 #2
0
ファイル: processor.py プロジェクト: ulif/ulif.openoffice
 def process(self, path, metadata):
     ext = os.path.splitext(path)[1]
     if ext not in self.supported_extensions:
         return path, metadata
     basename = os.path.basename(path)
     src_path = os.path.join(copy_to_secure_location(path), basename)
     src_dir = os.path.dirname(src_path)
     remove_file_dir(path)
     new_html, img_name_map = cleanup_html(
         codecs.open(src_path, 'r', 'utf-8').read(),
         basename,
         fix_head_nums=self.options['html_cleaner_fix_heading_numbers'],
         fix_img_links=self.options['html_cleaner_fix_image_links'],
         fix_sdfields=self.options['html_cleaner_fix_sd_fields'],
     )
     with codecs.open(src_path, 'wb', 'utf-8') as fd:
         fd.write(new_html)
     # Rename images
     self.rename_img_files(src_dir, img_name_map)
     return src_path, metadata
コード例 #3
0
ファイル: processor.py プロジェクト: ulif/ulif.openoffice
 def process(self, path, metadata):
     ext = os.path.splitext(path)[1]
     if ext not in self.supported_extensions:
         return path, metadata
     basename = os.path.basename(path)
     src_path = os.path.join(
         copy_to_secure_location(path), basename)
     src_dir = os.path.dirname(src_path)
     remove_file_dir(path)
     new_html, img_name_map = cleanup_html(
         codecs.open(src_path, 'r', 'utf-8').read(),
         basename,
         fix_head_nums=self.options['html_cleaner_fix_heading_numbers'],
         fix_img_links=self.options['html_cleaner_fix_image_links'],
         fix_sdfields=self.options['html_cleaner_fix_sd_fields'],
         )
     with codecs.open(src_path, 'wb', 'utf-8') as fd:
         fd.write(new_html)
     # Rename images
     self.rename_img_files(src_dir, img_name_map)
     return src_path, metadata
コード例 #4
0
 def test_cleanup_html_dont_fix_sdfields(self):
     html_input = '<p>Blah<sdfield type="PAGE">8</sdfield></p>'
     result, img_map = cleanup_html(html_input, 'sample.html',
                                    fix_sdfields=False)
     assert html_input == result
コード例 #5
0
 def test_cleanup_html_fix_sdfields(self):
     html_input = '<p>Blah<sdfield type="PAGE">8</sdfield></p>'
     result, img_map = cleanup_html(html_input, 'sample.html')
     expected = '<p>Blah<span class="sdfield" type="PAGE">8</span></p>'
     assert result == expected
コード例 #6
0
 def test_cleanup_html_fix_head_nums_linebreaks(self):
     html_input = '<body><h1>\n 1.1.Heading</h1></body>'
     result, img_map = cleanup_html(html_input, 'sample.html')
     expected = '<body><h1>\n <span class="u-o-headnum">%s</span>'
     expected += 'Heading</h1></body>'
     assert result == expected % ('1.1.')
コード例 #7
0
 def test_cleanup_html_fix_head_nums_tag_attrs(self):
     html_input = '<body><h6 class="foo">1.1.Heading</h6></body>'
     result, img_map = cleanup_html(html_input, 'sample.html')
     expected = '<body><h6 class="foo"><span class="u-o-headnum">%s'
     expected += '</span>Heading</h6></body>'
     assert result == expected % ('1.1.')
コード例 #8
0
 def test_cleanup_html_fix_head_nums_no_nums(self):
     html_input = '<body><h1>Heading</h1></body>'
     result, img_map = cleanup_html(html_input, 'sample.html')
     assert result == '<body><h1>Heading</h1></body>'
コード例 #9
0
 def test_cleanup_html_no_minify_by_default(self):
     # by default, cleanup_html does not minify code
     html_input = '<span>\n<span>foo</span>\n</span>'
     result, img_map = cleanup_html(html_input, 'sample.html')
     assert result == html_input
コード例 #10
0
 def test_cleanup_html_fix_img_links(self, samples_dir):
     # we do fix links to images.
     html_input = samples_dir.join("image_sample.html").read()
     result, img_map = cleanup_html(
         html_input, 'sample.html', fix_img_links=True)
     assert len(img_map) == 4