Ejemplo n.º 1
0
 def test_remove_file_dir_dir(self, tmpdir):
     # We remove a directory if given as argument, of course.
     tmpdir.join("sample_dir").mkdir()
     tmpdir.join("sample_dir").join("sample.txt").write("Hi!")
     remove_file_dir(str(tmpdir / "sample_dir"))  # different to above
     assert tmpdir.exists() is True
     assert tmpdir.join("sample_dir").exists() is False
Ejemplo n.º 2
0
 def test_remove_file_dir_file(self, tmpdir):
     # When we remove a file, also the containung dir is removed
     tmpdir.join("sample_dir").mkdir()
     tmpdir.join("sample_dir").join("sample.txt").write("Hi!")
     remove_file_dir(str(tmpdir / "sample_dir" / "sample.txt"))
     assert tmpdir.exists() is True
     assert tmpdir.join("sample_dir").exists() is False
Ejemplo n.º 3
0
 def _handle_error(self, proc, input, output, metadata):
     metadata['error-descr'] = metadata.get(
         'error-descr',
         'problem while processing %s' % proc.prefix)
     remove_file_dir(input)
     remove_file_dir(output)
     return metadata
Ejemplo n.º 4
0
 def test_remove_file_dir_dir(self):
     sample_path = os.path.join(self.workdir, 'sampledir')
     sample_file = os.path.join(sample_path, 'sample.txt')
     os.mkdir(sample_path)
     open(sample_file, 'wb').write('Hi!')
     remove_file_dir(sample_path)
     assert os.path.exists(self.workdir) is True
     assert os.path.exists(sample_path) is False
Ejemplo n.º 5
0
 def test_remove_file_dir_file(self):
     # When we remove a file, also the containung dir is removed
     sample_path = os.path.join(self.workdir, 'sampledir')
     sample_file = os.path.join(sample_path, 'sample.txt')
     os.mkdir(sample_path)
     open(sample_file, 'wb').write('Hi!')
     remove_file_dir(sample_file)
     assert os.path.exists(self.workdir) is True
     assert os.path.exists(sample_path) is False
Ejemplo n.º 6
0
    def process(self, input=None, metadata={'error': False}):
        """Run all processors defined in options.

        If all processors run successful, the output of the last along
        with (maybe modified) metadata is returned.

        Each processor is fed with the `metadata` dict and an `input`
        (normally a filepath). Feeding a processor means to call its
        `process` method.

        If a processor sets the ``error`` entry of `metadata` to
        ``True`` this indicates some problem and the whole process is
        aborted returning ``None`` as output and the `metadata`, maybe
        containing some smart hints about the reasons.

        If all processors work correctly, the output of the last
        processor is returned along with the last `metadata`.

        The set and order of processors called depends on the
        ``procord`` option passed in. If this option is set to some
        value like ``oocp,oocp`` then the ``oocp`` processor (which is
        the :class:`OOConvProcessor`, registered under ``oocp`` in
        `setup.py`) is called two times.

        .. note:: after each processing, the (then old) input is
                  removed.
        """
        metadata = metadata.copy()
        pipeline = self._build_pipeline()
        output = None

        for processor in pipeline:
            proc_instance = processor(self.all_options)
            output, metadata = proc_instance.process(input, metadata)
            if metadata['error'] is True:
                metadata = self._handle_error(processor, input, output,
                                              metadata)
                return None, metadata
            if input != output:
                remove_file_dir(input)
            input = output
        return input, metadata
Ejemplo n.º 7
0
    def process(self, input=None, metadata={'error': False}):
        """Run all processors defined in options.

        If all processors run successful, the output of the last along
        with (maybe modified) metadata is returned.

        Each processor is fed with the `metadata` dict and an `input`
        (normally a filepath). Feeding a processor means to call its
        `process` method.

        If a processor sets the ``error`` entry of `metadata` to
        ``True`` this indicates some problem and the whole process is
        aborted returning ``None`` as output and the `metadata`, maybe
        containing some smart hints about the reasons.

        If all processors work correctly, the output of the last
        processor is returned along with the last `metadata`.

        The set and order of processors called depends on the
        ``procord`` option passed in. If this option is set to some
        value like ``oocp,oocp`` then the ``oocp`` processor (which is
        the :class:`OOConvProcessor`, registered under ``oocp`` in
        `setup.py`) is called two times.

        .. note:: after each processing, the (then old) input is
                  removed.
        """
        metadata = metadata.copy()
        pipeline = self._build_pipeline()
        output = None

        for processor in pipeline:
            proc_instance = processor(self.all_options)
            output, metadata = proc_instance.process(input, metadata)
            if metadata['error'] is True:
                metadata = self._handle_error(
                    processor, input, output, metadata)
                return None, metadata
            if input != output:
                remove_file_dir(input)
            input = output
        return input, metadata
Ejemplo n.º 8
0
 def process(self, path, metadata):
     ext = os.path.splitext(path)[1]
     if ext not in self.supported_extensions:
         return path, metadata
     basename = os.path.basename(path)
     src_path = os.path.join(copy_to_secure_location(path), basename)
     src_dir = os.path.dirname(src_path)
     remove_file_dir(path)
     new_html, img_name_map = cleanup_html(
         codecs.open(src_path, 'r', 'utf-8').read(),
         basename,
         fix_head_nums=self.options['html_cleaner_fix_heading_numbers'],
         fix_img_links=self.options['html_cleaner_fix_image_links'],
         fix_sdfields=self.options['html_cleaner_fix_sd_fields'],
     )
     with codecs.open(src_path, 'wb', 'utf-8') as fd:
         fd.write(new_html)
     # Rename images
     self.rename_img_files(src_dir, img_name_map)
     return src_path, metadata
Ejemplo n.º 9
0
 def process(self, path, metadata):
     ext = os.path.splitext(path)[1]
     if ext not in self.supported_extensions:
         return path, metadata
     basename = os.path.basename(path)
     src_path = os.path.join(
         copy_to_secure_location(path), basename)
     src_dir = os.path.dirname(src_path)
     remove_file_dir(path)
     new_html, img_name_map = cleanup_html(
         codecs.open(src_path, 'r', 'utf-8').read(),
         basename,
         fix_head_nums=self.options['html_cleaner_fix_heading_numbers'],
         fix_img_links=self.options['html_cleaner_fix_image_links'],
         fix_sdfields=self.options['html_cleaner_fix_sd_fields'],
         )
     with codecs.open(src_path, 'wb', 'utf-8') as fd:
         fd.write(new_html)
     # Rename images
     self.rename_img_files(src_dir, img_name_map)
     return src_path, metadata
Ejemplo n.º 10
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(copy_to_secure_location(path), basename)
        src_dir = os.path.dirname(src_path)
        remove_file_dir(path)

        # Remove <SDFIELD> tags if any
        cleaned_html = rename_sdfield_tags(
            open(src_path, 'rb').read().decode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(cleaned_html.encode('utf-8'))

        error_file = os.path.join(src_dir, 'tidy-errors')
        cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % (
            error_file, src_path)
        os.system(cmd)
        os.unlink(error_file)
        return src_path, metadata
Ejemplo n.º 11
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(
            copy_to_secure_location(path), basename)
        src_dir = os.path.dirname(src_path)
        remove_file_dir(path)

        # Remove <SDFIELD> tags if any
        cleaned_html = rename_sdfield_tags(
            open(src_path, 'rb').read().decode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(cleaned_html.encode('utf-8'))

        error_file = os.path.join(src_dir, 'tidy-errors')
        cmd = 'tidy -asxhtml -clean -indent -modify -utf8 -f %s %s' % (
            error_file, src_path)
        os.system(cmd)
        os.unlink(error_file)
        return src_path, metadata
Ejemplo n.º 12
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(
            copy_to_secure_location(path), basename)
        remove_file_dir(path)

        new_html, css = extract_css(
            open(src_path, 'rb').read().decode('utf-8'), basename,
            prettify_html=self.options['css_cleaner_prettify_html'])
        css, errors = cleanup_css(
            css, minified=self.options['css_cleaner_minified'])

        css_file = os.path.splitext(src_path)[0] + '.css'
        if css is not None:
            with open(css_file, 'wb') as fd:
                fd.write(css.encode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(new_html.encode('utf-8'))

        return src_path, metadata
Ejemplo n.º 13
0
    def process(self, path, metadata):
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(copy_to_secure_location(path), basename)
        remove_file_dir(path)

        new_html, css = extract_css(
            open(src_path, 'rb').read().decode('utf-8'),
            basename,
            prettify_html=self.options['css_cleaner_prettify_html'])
        css, errors = cleanup_css(
            css, minified=self.options['css_cleaner_minified'])

        css_file = os.path.splitext(src_path)[0] + '.css'
        if css is not None:
            with open(css_file, 'wb') as fd:
                fd.write(css.encode('utf-8'))
        with open(src_path, 'wb') as fd:
            fd.write(new_html.encode('utf-8'))

        return src_path, metadata
Ejemplo n.º 14
0
    def process(self, path, metadata):
        """Do PSJ-specific adaptions of generated HTML input.

        `path` gives any (beforehand) generated HTML document. The
        path might be located in a directory with additional files
        (images, etc.) that could also be processed.

        `metadata` is a dictionary of metadata concerning the
        conversion process. It contains at least a key ``error`` with
        a boolean value (should alway be `False`, otherwise the
        document conversion failed), and a key ``error-descr`` which
        contains some error message in case of failures.

        The ``error`` and ``error-descr`` should be set when
        unresolvable processing problems occur.

        Returns a tuple (``result_path``, ``metadata``) with
        ``result_path`` containing the path to the modified document
        and ``metadata`` containing the updated ``metadata`` directory
        passed in.
        """
        ext = os.path.splitext(path)[1]
        if ext not in self.supported_extensions:
            return path, metadata
        basename = os.path.basename(path)
        src_path = os.path.join(
            copy_to_secure_location(path), basename)
        remove_file_dir(path)

        html = self.fix_html(open(src_path, 'r').read())
        open(src_path, 'w').write(html.encode('utf-8'))

        css = self.get_css(os.path.dirname(src_path))
        css = self.fix_css(css)
        open(os.path.join(
            os.path.dirname(src_path), 'psj.css'), 'w').write(css)
        return src_path, metadata
Ejemplo n.º 15
0
 def test_remove_file_dir_none(self):
     # we do not complain about files that do not exist
     assert remove_file_dir(None) is None
Ejemplo n.º 16
0
 def test_remove_file_dir_not_existing(self):
     # we do not complain about not existing file paths
     assert remove_file_dir('not-existing-path') is None
Ejemplo n.º 17
0
 def test_remove_file_dir_not_existiing(self):
     assert remove_file_dir('not-existing-path') is None
Ejemplo n.º 18
0
 def test_remove_file_dir_non_path(self):
     assert remove_file_dir(object()) is None
Ejemplo n.º 19
0
 def test_remove_file_dir_none(self):
     assert remove_file_dir(None) is None
Ejemplo n.º 20
0
 def _handle_error(self, proc, input, output, metadata):
     metadata['error-descr'] = metadata.get(
         'error-descr', 'problem while processing %s' % proc.prefix)
     remove_file_dir(input)
     remove_file_dir(output)
     return metadata
Ejemplo n.º 21
0
 def test_remove_file_dir_non_path(self):
     # we do not complain about objects that are not file paths
     assert remove_file_dir(object()) is None
Ejemplo n.º 22
0
 def tearDown(self):
     remove_file_dir(self.workdir)
     remove_file_dir(self.resultpath)