Exemple #1
0
    def diff(self, request, *args, **kwargs):
        # this can be cached because the underlying data won't change (although
        # the formatting might)
        version = self.get_object()

        # most recent version just before this one
        old_version = self.get_queryset().filter(id__lt=version.id).first()

        differ = AttributeDiffer()

        if old_version:
            old_document = old_version._object_version.object
            old_document.document_xml = differ.preprocess_document_diff(
                old_document.document_xml)
            old_html = old_document.to_html()
        else:
            old_html = ""

        new_document = version._object_version.object
        new_document.document_xml = differ.preprocess_document_diff(
            new_document.document_xml)
        new_html = new_document.to_html()

        old_tree = lxml.html.fromstring(old_html) if old_html else None
        new_tree = lxml.html.fromstring(new_html)
        n_changes = differ.diff_document_html(old_tree, new_tree)

        diff = lxml.html.tostring(new_tree, encoding='unicode')

        # TODO: include other diff'd attributes

        return Response({
            'content': diff,
            'n_changes': n_changes,
        })
Exemple #2
0
    def post(self, request):
        serializer = DocumentAPISerializer(data=self.request.data)
        serializer.fields['document'].fields['content'].required = True
        serializer.is_valid(raise_exception=True)

        differ = AttributeDiffer()

        current_document = serializer.fields['document'].update_document(
            Document(), serializer.validated_data['document'])
        current_document.document_xml = differ.preprocess_document_diff(
            current_document.document_xml)
        current_html = current_document.to_html()

        comparison_doc_id = request.data['comparison_doc_id']
        try:
            comparison_document = Document.objects.get(id=comparison_doc_id)
        except Document.DoesNotExist:
            raise Http404()
        comparison_document.document_xml = differ.preprocess_document_diff(
            comparison_document.document_xml)
        comparison_document_html = comparison_document.to_html()

        current_tree = lxml.html.fromstring(current_html)
        comparison_tree = lxml.html.fromstring(comparison_document_html)
        n_changes = differ.diff_document_html(comparison_tree, current_tree)

        diff = lxml.html.tostring(current_tree, encoding='utf-8')

        # TODO: include other diff'd attributes

        return Response({
            'content': diff,
            'n_changes': n_changes,
        })
Exemple #3
0
    def post(self, request, document_id):
        serializer = DocumentDiffSerializer(instance=self.document,
                                            data=self.request.data)
        serializer.is_valid(raise_exception=True)

        differ = AttributeDiffer()

        local_doc = self.document

        # set this up to be the modified document
        remote_doc = Document.objects.get(pk=local_doc.pk)
        serializer.fields['document'].update_document(
            local_doc, serializer.validated_data['document'])

        local_doc.content = differ.preprocess_document_diff(
            local_doc.document_xml).decode('utf-8')
        remote_doc.content = differ.preprocess_document_diff(
            remote_doc.document_xml).decode('utf-8')

        element_id = serializer.validated_data.get('element_id')
        if element_id:
            # handle certain elements that don't have ids
            if element_id in ['preface', 'preamble', 'components']:
                xpath = f'//a:{element_id}'
            else:
                xpath = f'//a:*[@eId="{element_id}"]'

            # diff just this element
            local_element = local_doc.doc.root.xpath(
                xpath, namespaces={'a': local_doc.doc.namespace})
            remote_element = remote_doc.doc.root.xpath(
                xpath, namespaces={'a': local_doc.doc.namespace})

            local_html = local_doc.to_html(
                element=local_element[0]) if len(local_element) else None
            remote_html = remote_doc.to_html(
                element=remote_element[0]) if len(remote_element) else None
        else:
            # diff the whole document
            local_html = local_doc.to_html()
            remote_html = remote_doc.to_html()

        local_tree = lxml.html.fromstring(local_html or "<div></div>")
        remote_tree = lxml.html.fromstring(
            remote_html) if remote_html else None
        n_changes, diff = differ.diff_document_html(remote_tree, local_tree)

        if not isinstance(diff, str):
            diff = lxml.html.tostring(diff, encoding='utf-8')

        # TODO: include other diff'd attributes

        return Response({
            'html_diff': diff,
            'n_changes': n_changes,
        })
Exemple #4
0
 def setUp(self):
     self.differ = AttributeDiffer()
Exemple #5
0
class AttributeDifferTestCase(TestCase):
    def setUp(self):
        self.differ = AttributeDiffer()

    def test_text_changed(self):
        old = as_tree('<p>abc 123</p>')
        new = as_tree('<p>def 456</p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p><del>abc</del><ins>def</ins> <del>123</del><ins>456</ins></p>',
        )

    def test_text_partially_changed(self):
        old = as_tree('<p>some old text</p>')
        new = as_tree('<p>some new text</p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p>some <del>old</del><ins>new</ins> text</p>',
        )

    def test_text_partially_changed_with_elements(self):
        old = as_tree(
            '<p>some old text <b>no change</b> text <i>no change</i></p>')
        new = as_tree(
            '<p>some new text <b>no change</b> text <i>no change</i></p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p>some <del>old</del><ins>new</ins> text <b>no change</b> text <i>no change</i></p>',
        )

    def test_tail_changed(self):
        old = as_tree('<p>something <b>bold</b> 123 xx <i>and</i> same </p>')
        new = as_tree('<p>something <b>bold</b> 456 xx <i>and</i> same </p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p>something <b>bold</b> <del>123</del><ins>456</ins> xx <i>and</i> same </p>',
        )

    def test_inline_tag_removed(self):
        old = as_tree('<p>Some text <b>bold text</b> and a tail.</p>')
        new = as_tree('<p>Some text bold text and a tail.</p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p>Some text <ins>bold text and a tail.</ins><b class="del">bold text</b><del> and a tail.</del></p>',
        )

    def test_inline_tag_added(self):
        old = as_tree('<p>Some text bold text and a tail.</p>')
        new = as_tree('<p>Some text <b>bold text</b> and a tail.</p>')
        self.differ.diff_document_html(old, new)

        self.assertEqual(
            as_html(new),
            '<p>Some text <del>bold text and a tail.</del><b class="ins">bold text</b><ins> and a tail.</ins></p>',
        )

    def test_diff_lists_deleted(self):
        diffs = self.differ.diff_lists('test', 'Test', ['1', '2', '3'],
                                       ['1', '3'])
        self.assertEqual(
            {
                'attr':
                'test',
                'title':
                'Test',
                'type':
                'list',
                'changes': [{
                    'html_new': '1',
                    'html_old': '1'
                }, {
                    'html_new': '',
                    'html_old': '<del>2</del>',
                    'new': None,
                    'old': '2'
                }, {
                    'html_new': '3',
                    'html_old': '3'
                }]
            }, diffs)

    def test_diff_lists_empty(self):
        diffs = self.differ.diff_lists('test', 'Test', ['1', '2', '3'], [])
        self.assertEqual(
            {
                'attr':
                'test',
                'title':
                'Test',
                'type':
                'list',
                'changes': [{
                    'html_new': '',
                    'html_old': '<del>1</del>',
                    'new': None,
                    'old': '1'
                }, {
                    'html_new': '',
                    'html_old': '<del>2</del>',
                    'new': None,
                    'old': '2'
                }, {
                    'html_new': '',
                    'html_old': '<del>3</del>',
                    'new': None,
                    'old': '3'
                }]
            }, diffs)

    def test_diff_lists_added(self):
        diffs = self.differ.diff_lists('test', 'Test', ['1', '3'],
                                       ['1', '2', '3'])
        self.assertEqual(
            {
                'attr':
                'test',
                'title':
                'Test',
                'type':
                'list',
                'changes': [{
                    'html_new': '1',
                    'html_old': '1'
                }, {
                    'html_new': '<ins>2</ins>',
                    'html_old': '',
                    'new': '2',
                    'old': None
                }, {
                    'html_new': '3',
                    'html_old': '3'
                }]
            }, diffs)