def migrate_doc(doc: DocCursor) -> DocCursor: """Apply all transforms to a given document. Save it and return.""" for transform in transforms: doc = transform(doc) doc.nested_set_renumber(bulk_create=False) for node in doc.walk(): node.save() return doc
def get_footnote_node(self, instance: FootnoteCitation): footnote_tree = DocCursor(self.cursor_tree, instance.footnote_node.identifier) cursor_serializer = type(self.context['parent_serializer']) return cursor_serializer(footnote_tree, context={ 'is_root': False }).data
def test_external_links(): """The "content" field should contain serialized ExternalLinks.""" policy = mommy.make(Policy) para = DocCursor.new_tree('para', text='Go over there!', policy=policy) para.nested_set_renumber() para.externallinks.create(start=len('Go over '), end=len('Go over there'), href='http://example.com/aaa') result = doc_cursor.DocCursorSerializer(para, context={ 'policy': policy }).data assert result['content'] == [{ 'content_type': '__text__', 'inlines': [], 'text': 'Go over ' }, { 'content_type': 'external_link', 'inlines': [{ 'content_type': '__text__', 'inlines': [], 'text': 'there', }], 'href': 'http://example.com/aaa', 'text': 'there', }, { 'content_type': '__text__', 'inlines': [], 'text': '!', }]
def test_content_no_annotations(): node = mommy.make(DocNode, text='Some text here') cursor = DocCursor.load_from_model(node) content = doc_cursor.DocCursorSerializer(cursor).data['content'] assert len(content) == 1 assert content[0]['text'] == 'Some text here' assert content[0]['content_type'] == '__text__'
def test_json_put_works_for_admin_users(admin_client): policy = mommy.make(Policy, workflow_phase='published') root = DocCursor.new_tree('root', '0', policy=policy) root.add_child('sec', text='blah') root.nested_set_renumber() # Get the original document... response = admin_client.get(f"/{policy.pk}") assert response.status_code == 200 result = response.json() # Modify it a bit... result['children'][0]['title'] = 'boop' result['children'][0]['content'][0]['text'] = 'hallo' response = admin_client.put(f"/{policy.pk}", data=json.dumps(result), content_type='application/json') assert response.status_code == 204 # Now fetch it again, and make sure our modification stuck. response = admin_client.get(f"/{policy.pk}") assert response.status_code == 200 result = response.json() assert result['children'][0]['title'] == 'boop' assert result['children'][0]['content'][0]['text'] == 'hallo'
def handle(self, *args, **kwargs): roots = DocNode.objects.filter(depth=0) with tqdm(total=roots.count()) as pbar: for root_docnode in roots: with transaction.atomic(): doc = DocCursor.load_from_model(root_docnode) migrate_doc(doc) pbar.update(1)
def test_policy_edit_doc_button(admin_client): policy = mommy.make(Policy, slug='some-policy') root = DocCursor.new_tree('root', policy=policy) root.nested_set_renumber() result = admin_client.get(f'/admin/reqs/policy/{policy.pk}/change/') assert b'_savethendoc' in result.content assert b'Save and edit document' in result.content
def to_representation(self, instance: DocCursor) -> List[PrimitiveDict]: annotations = nest_annotations( instance.annotations(), len(instance.text)) return NestedAnnotationSerializer( annotations, context={'cursor': instance, 'parent_serializer': self.context['parent_serializer']}, many=True, ).data
def test_by_pretty_url(client): policy = mommy.make(Policy, omb_policy_id='M-Something-18', workflow_phase='published') root = DocCursor.new_tree('root', '0', policy=policy) root.nested_set_renumber() result = json.loads(client.get("/M-Something-18").content.decode("utf-8")) assert result == DocCursorSerializer(root, context={'policy': policy}).data
def test_404s(client): policy = mommy.make(Policy, workflow_phase='published') root = DocCursor.new_tree('root', '0', policy=policy) root.add_child('sec') root.nested_set_renumber() assert client.get("/987654321").status_code == 404 assert client.get(f"/{policy.pk}").status_code == 200 assert client.get(f"/{policy.pk}/root_0").status_code == 200 assert client.get(f"/{policy.pk}/root_1").status_code == 404 assert client.get(f"/{policy.pk}/root_0__sec_1").status_code == 200 assert client.get(f"/{policy.pk}/root_0__sec_2").status_code == 404
def get_descendant_footnotes(self, instance): """Find all footnote nodes that are cited by this node or any of its descendants.""" if not instance.is_root and instance.node_type != 'table': return None footnotes = [] for node in instance.cursor.walk(): for citation in node.footnotecitations.all(): subtree = DocCursor(instance.cursor.tree, citation.footnote_node.identifier) footnotes.append(self.serialize_doc_cursor(subtree)) return footnotes
def test_nonpublic(client, admin_client): policy = mommy.make(Policy, omb_policy_id='M-Something-18', workflow_phase='edit') root = DocCursor.new_tree('root', '0', policy=policy) root.nested_set_renumber() result = client.get('/M-Something-18') assert result.status_code == 404 result = admin_client.get('/M-Something-18') assert result.status_code == 200
def test_table_of_contents(): """The table of contents should be generated from titled elements, regardless of position in tree.""" policy = mommy.make(Policy) root = DocCursor.new_tree('root', policy=policy) root.add_child('container', title='First title') root['container_1'].add_child('para') root['container_1']['para_1'].add_child('subpar') root['container_1']['para_1']['subpar_1'].add_child('item', title='Sub') root.add_child('other', title='Second') root.nested_set_renumber() data = meta.MetaSerializer( meta.Meta(root, is_root=True, policy=policy), context={ 'parent_serializer': DocCursorSerializer() }, ).data assert data['table_of_contents'] == { 'identifier': 'root_1', 'title': '', 'children': [ { 'identifier': 'root_1__container_1', 'title': 'First title', 'children': [{ 'identifier': ('root_1__container_1__para_1__subpar_1' '__item_1'), 'title': 'Sub', 'children': [], }], }, { 'identifier': 'root_1__other_1', 'title': 'Second', 'children': [], }, ], } data = meta.MetaSerializer( meta.Meta(root['other_1'], is_root=False, policy=policy), context={ 'parent_serializer': DocCursorSerializer() }, ).data assert 'table_of_contents' not in data
def test_ensure_section_has_heading(): root = DocCursor.new_tree('root') sec1 = root.add_child('sec') sec1.add_child('sec') sec12 = sec1.add_child('sec') sec12.add_child('heading', text='Subheading') root.add_child('sec') root = migrate_documents.ensure_section_has_heading(root) assert root['sec_1']['heading_1'].text == '--Missing Heading--' assert root['sec_1']['sec_1']['heading_1'].text == '--Missing Heading--' assert root['sec_1']['sec_2']['heading_1'].text == 'Subheading' assert root['sec_2']['heading_1'].text == '--Missing Heading--'
def ensure_section_has_heading(doc: DocCursor) -> DocCursor: """We expect all sections to have a heading. Fill the missing data with placeholder text.""" # materialize so we don't need to worry about a modified iterator secs = list(doc.filter(lambda n: n.node_type == 'sec')) for sec in secs: children = list(sec.children()) if not children or children[0].node_type != 'heading': sec.add_child('heading', insert_pos=0, text='--Missing Heading--', policy_id=doc.policy_id) return doc
def test_descendant_footnotes_meta(node_type, is_root): """Only the root and "table" nodes should get descendant_footnotes.""" policy = mommy.make(Policy) cursor = DocCursor.new_tree(node_type, policy=policy) cursor.nested_set_renumber() meta_obj = meta.Meta(cursor, is_root, policy) result = meta.MetaSerializer( meta_obj, context={ 'parent_serializer': DocCursorSerializer() }, ).data if node_type == 'table' or is_root: assert 'descendant_footnotes' in result else: assert 'descendant_footnotes' not in result
def test_descendant_footnotes(): """We pull out footnotes of all descendants, and only descendants.""" policy = mommy.make(Policy) root = DocCursor.new_tree('root', policy=policy) ftnt_a = root.add_child('footnote', 'a') root.add_child('para') ftnt_b = root['para_1'].add_child('footnote', 'b') root.add_child('list') root['list_1'].add_child('para') root['list_1'].add_child('para') root['list_1'].add_child('para') ftnt_c = root['list_1']['para_3'].add_child('footnote', 'c') root.nested_set_renumber() root['para_1'].footnotecitations.create(start=0, end=1, footnote_node=ftnt_a.model) root['para_1'].footnotecitations.create(start=1, end=2, footnote_node=ftnt_b.model) root['list_1']['para_2'].footnotecitations.create( start=0, end=1, footnote_node=ftnt_c.model) def fts(cursor): meta_obj = meta.Meta(cursor, is_root=True, policy=policy) data = meta.MetaSerializer( meta_obj, context={ 'parent_serializer': DocCursorSerializer() }, ).data return [node['identifier'] for node in data['descendant_footnotes']] assert fts(root) == [ 'root_1__footnote_a', 'root_1__para_1__footnote_b', 'root_1__list_1__para_3__footnote_c' ] assert fts(root['footnote_a']) == [] assert fts(root['para_1']) == [ 'root_1__footnote_a', 'root_1__para_1__footnote_b' ] assert fts(root['list_1']) == ['root_1__list_1__para_3__footnote_c'] assert fts( root['list_1']['para_2']) == ['root_1__list_1__para_3__footnote_c'] # no citations in para 3 assert fts(root['list_1']['para_3']) == []
def test_content_outside(): node = mommy.make(DocNode, text='Some text here') node.externallinks.create(start=0, end=len('Some '), href='http://example.com/aaa') node.externallinks.create(start=len('Some text'), end=len('Some text here'), href='http://example.com/bbb') cursor = DocCursor.load_from_model(node) content = doc_cursor.DocCursorSerializer(cursor).data['content'] assert len(content) == 3 assert [c['text'] for c in content] == ['Some ', 'text', ' here'] assert [c['content_type'] for c in content] == [ 'external_link', '__text__', 'external_link', ]
def test_create_document_success(monkeypatch): monkeypatch.setattr(forms, 'OMBDocument', Mock()) monkeypatch.setattr(forms, 'to_db', Mock()) monkeypatch.setattr(forms, 'migrate_doc', Mock()) policy = mommy.make(Policy, document_source=SimpleUploadedFile('a.pdf', b'aaa')) doc = DocCursor.new_tree('policy', policy=policy) doc.add_child('para', text='Content') doc.nested_set_renumber() forms.to_db.return_value = doc forms.create_document(policy) assert forms.OMBDocument.from_file.call_args ==\ call(policy.document_source.file) assert forms.to_db.called assert policy.workflow_phase == 'cleanup' assert forms.migrate_doc.called
def get_object(self, prefetch_related=True): only_published = not self.request.user.is_authenticated policy = policy_or_404(self.kwargs['policy_id'], only_published) # we'll pass this policy down when we serialize self.policy = policy query_args = {'policy_id': policy.pk} if self.kwargs.get('identifier'): query_args['identifier'] = self.kwargs['identifier'] else: query_args['depth'] = 0 queryset = DocNode.objects if prefetch_related: queryset = queryset.prefetch_annotations() root_doc = get_object_or_404(queryset, **query_args) root = DocCursor.load_from_model(root_doc, subtree=False) if prefetch_related: root.add_models(root_doc.descendants().prefetch_annotations()) self.check_object_permissions(self.request, root) return root
def test_correct_data(client): policy = mommy.make(Policy, workflow_phase='published') root = DocCursor.new_tree('root', '0', policy=policy) sec1 = root.add_child('sec') root.add_child('sec') sec1.add_child('para', 'a') root.nested_set_renumber() def result(url): return json.loads(client.get(url).content.decode('utf-8')) def serialize(node): return DocCursorSerializer(node, context={'policy': policy}).data assert result(f"/{policy.pk}") == serialize(root) assert result(f"/{policy.pk}/root_0") == serialize(root) assert result(f"/{policy.pk}/root_0__sec_1") == serialize(root['sec_1']) assert result(f"/{policy.pk}/root_0__sec_2") == serialize(root['sec_2']) assert result(f"/{policy.pk}/root_0__sec_1__para_a") \ == serialize(root['sec_1']['para_a'])
def test_policy_redirect_to_editor(admin_client): policy = mommy.make(Policy, slug='some-policy') root = DocCursor.new_tree('root', policy=policy) root.nested_set_renumber() result = admin_client.post( f'/admin/reqs/policy/{policy.pk}/change/', { 'title': 'Some new policy title', 'omb_policy_id': '', 'issuance': policy.issuance.isoformat(), 'sunset': '', 'public': 'on', 'workflow_phase': 'cleanup', '_savethendoc': 'Save and edit document', }) assert result.status_code == 302 assert result['Location'] == '/admin/document-editor/some-policy' policy.refresh_from_db() assert policy.title == 'Some new policy title'
def ensure_listitem_in_list(doc: DocCursor) -> DocCursor: """We expect all listitems to have a parent list. However, the parser has emitted lists followed by bare listitems. We'll place the listitem in the list that imemdiately precedes it.""" for li in doc.filter(lambda n: n.node_type == 'listitem'): parent = li.parent() prev_sibling = li.left_sibling() if not parent: logger.warning('Root of %s is an li.', doc.policy.title_with_number) continue if parent.node_type == 'list': # all is well continue if prev_sibling and prev_sibling.node_type == 'list': li.append_to(prev_sibling) return ensure_listitem_in_list(doc) # else: create new list to wrap this one logger.warning('Could not fix li in %s', doc.policy.title_with_number) return doc # no changes needed
def test_create_requirements(): policy = mommy.make(Policy) mommy.make(Requirement, policy=policy, _quantity=5) doc = DocCursor.new_tree('policy', policy=policy) doc.add_child('para', text='First paragraph') doc.add_child('sec') doc['sec_1'].add_child('heading', text='A section') doc['sec_1'].add_child('para', text='Second paragraph') doc['sec_1'].add_child('para', text='Final paragraph') doc.nested_set_renumber() # These will be deleted assert Requirement.objects.filter(policy=policy).count() == 5 forms.create_requirements(policy) assert Requirement.objects.filter(policy=policy).count() == 4 assert set(Requirement.objects.values_list('req_text', flat=True)) == { 'First paragraph', 'A section', 'Second paragraph', 'Final paragraph', }
def test_ensure_listitem_in_list(): root = DocCursor.new_tree('root') list_el = root.add_child('list') list_el.add_child('listitem', '1') root.add_child('listitem', '2') root.add_child('listitem', '3') assert [n.identifier for n in root.walk()] == [ 'root_1', 'root_1__list_1', 'root_1__list_1__listitem_1', 'root_1__listitem_2', 'root_1__listitem_3', ] root = migrate_documents.ensure_listitem_in_list(root) assert [n.identifier for n in root.walk()] == [ 'root_1', 'root_1__list_1', 'root_1__list_1__listitem_1', 'root_1__list_1__listitem_2', 'root_1__list_1__listitem_3', ]
def test_content_middle_annotation(): cursor = DocCursor.new_tree('policy', policy=mommy.make(Policy), text='Some text here') footnote_cursor = cursor.add_child('child') cursor.nested_set_renumber() cursor.footnotecitations.create(start=len('Some '), end=len('Some text'), footnote_node=footnote_cursor.model) content = doc_cursor.DocCursorSerializer(cursor).data['content'] assert len(content) == 3 assert [c['text'] for c in content] == ['Some ', 'text', ' here'] assert [c['content_type'] for c in content] == [ '__text__', 'footnote_citation', '__text__', ] assert content[1]['footnote_node'] == doc_cursor.DocCursorSerializer( footnote_cursor, context={ 'is_root': False }).data
def test_akn_put_works_for_admin_users(admin_client): policy = mommy.make(Policy, workflow_phase='published') root = DocCursor.new_tree('root', '0', policy=policy) root.add_child('sec', text='blah') root.nested_set_renumber() # Get the original document... response = admin_client.get(f"/{policy.pk}?format=akn") assert response.status_code == 200 assert response['content-type'] == 'application/akn+xml; charset=utf-8' # Modify it a bit... xml = response.content.replace(b'blah', b'hallo') response = admin_client.put(f"/{policy.pk}", data=xml, content_type='application/akn+xml') assert response.status_code == 204 # Now fetch it again, and make sure our modification stuck. response = admin_client.get(f"/{policy.pk}") assert response.status_code == 200 result = response.json() assert result['children'][0]['content'][0]['text'] == 'hallo'
def get_table_of_contents(self, instance): if instance.is_root: only_titled = DocNode.objects.exclude(title='') titled_cursor = DocCursor.load_from_model(instance.cursor.model, queryset=only_titled) return TableOfContentsSerializer(titled_cursor).data
def test_end_to_end(): """Create a tree, then serialize it. Trivially modify the serialized value and deserialize it.""" policy = mommy.make( Policy, issuance=date(2001, 2, 3), omb_policy_id='M-18-18', title='Some Title', uri='http://example.com/thing.pdf', ) root = DocCursor.new_tree('root', '0', policy=policy, title='Policy A') root.add_child('sec', text='Section 1', title='First Section') sec2 = root.add_child('sec', title='Section 2') pa = sec2.add_child('para', 'a', marker='(a)') pa.add_child('para', '1', text='Paragraph (a)(1)', marker='(1)') sec2.add_child('para', 'b', marker='b.') root.nested_set_renumber() result = doc_cursor.DocCursorSerializer(root, context={ 'policy': policy }).data assert result == { 'identifier': 'root_0', 'node_type': 'root', 'type_emblem': '0', 'text': '', 'title': 'Policy A', 'marker': '', 'depth': 0, 'content': [], 'meta': { 'descendant_footnotes': [], 'policy': { # Note this field does not appear on children 'issuance': '2001-02-03', 'omb_policy_id': 'M-18-18', 'original_url': 'http://example.com/thing.pdf', 'title': 'Some Title', 'title_with_number': 'M-18-18: Some Title', }, 'table_of_contents': { 'identifier': 'root_0', 'title': 'Policy A', 'children': [ { 'children': [], 'identifier': 'root_0__sec_1', 'title': 'First Section', }, { 'children': [], 'identifier': 'root_0__sec_2', 'title': 'Section 2', }, ], }, }, 'children': [ { 'identifier': 'root_0__sec_1', 'node_type': 'sec', 'type_emblem': '1', 'text': 'Section 1', 'title': 'First Section', 'marker': '', 'depth': 1, 'meta': {}, 'content': [{ 'content_type': '__text__', 'inlines': [], 'text': 'Section 1', }], 'children': [], }, { 'identifier': 'root_0__sec_2', 'node_type': 'sec', 'type_emblem': '2', 'text': '', 'title': 'Section 2', 'marker': '', 'depth': 1, 'meta': {}, 'content': [], 'children': [ { 'identifier': 'root_0__sec_2__para_a', 'node_type': 'para', 'type_emblem': 'a', 'text': '', 'title': '', 'marker': '(a)', 'depth': 2, 'meta': {}, 'content': [], 'children': [ { 'identifier': 'root_0__sec_2__para_a__para_1', 'node_type': 'para', 'type_emblem': '1', 'text': 'Paragraph (a)(1)', 'title': '', 'marker': '(1)', 'depth': 3, 'meta': {}, 'content': [{ 'content_type': '__text__', 'inlines': [], 'text': 'Paragraph (a)(1)', }], 'children': [], }, ], }, { 'identifier': 'root_0__sec_2__para_b', 'node_type': 'para', 'type_emblem': 'b', 'text': '', 'title': '', 'marker': 'b.', 'depth': 2, 'meta': {}, 'content': [], 'children': [], }, ], }, ], } result['title'] = 'MODIFIED Policy A' val = doc_cursor.DocCursorSerializer().to_internal_value(result) new_root = doc_cursor.DocCursorSerializer().update(root, val) assert new_root.title == 'MODIFIED Policy A' assert new_root.policy.pk == policy.pk
def test_footnote_citations(): """The "content" field should contain serialized FootnoteCitations.""" policy = mommy.make(Policy) para = DocCursor.new_tree('para', text='Some1 message2 here', policy=policy) footnote1 = para.add_child('footnote').model footnote2 = para.add_child('footnote').model para.nested_set_renumber() para.footnotecitations.create(start=len('Some'), end=len('Some1'), footnote_node=footnote1) para.footnotecitations.create(start=len('Some1 message'), end=len('Some1 message2'), footnote_node=footnote2) result = doc_cursor.DocCursorSerializer(para, context={ 'policy': policy }).data assert result['content'] == [{ 'content_type': '__text__', 'inlines': [], 'text': 'Some', }, { 'content_type': 'footnote_citation', 'inlines': [{ 'content_type': '__text__', 'inlines': [], 'text': '1', }], 'text': '1', 'footnote_node': doc_cursor.DocCursorSerializer( para['footnote_1'], context={ 'policy': policy, 'is_root': False }, ).data, }, { 'content_type': '__text__', 'inlines': [], 'text': ' message', }, { 'content_type': 'footnote_citation', 'inlines': [{ 'content_type': '__text__', 'inlines': [], 'text': '2', }], 'text': '2', 'footnote_node': doc_cursor.DocCursorSerializer( para['footnote_2'], context={ 'policy': policy, 'is_root': False }, ).data, }, { 'content_type': '__text__', 'inlines': [], 'text': ' here', }]