def test_links(self): # link linktext = 'This is [an example](http://example.com/ "Title") inline link.' self.assertEqual('<p>This is <ref target="http://example.com/" n="Title">an example</ref> inline link.</p>', markdown_tei.convert(linktext)) linktext = '[This link](http://example.net/) has no title attribute.' self.assertEqual('<p><ref target="http://example.net/">This link</ref> has no title attribute.</p>', markdown_tei.convert(linktext))
def test_emphasis(self): # emphasis - bold self.assertEqual('<p>a <emph rend="bold">bold</emph> statement</p>', markdown_tei.convert('a **bold** statement')) # emphasis - italic self.assertEqual('<p>an <emph rend="italic">emphatic</emph> statement</p>', markdown_tei.convert('an *emphatic* statement'))
def test_inline_html(self): # code, inline and block html = 'Something about <i>Murder in the Cathedral</i> by Eliot' self.assertEqual('<p>Something about <emph rend="italic">Murder in ' + 'the Cathedral</emph> by Eliot</p>', markdown_tei.convert(html)) self.assertEqual('<p>empty inline <emph rend="italic"></emph></p>', markdown_tei.convert('empty inline <i/>'))
def test_paragraphs(self): # single paragraph ptext = 'Single paragraph' self.assertEqual('<p>%s</p>' % ptext, markdown_tei.convert(ptext)) # two paragraphs ptext2 = 'Second paragraph' self.assertEqual('<p>%s</p><p>%s</p>' % (ptext, ptext2), markdown_tei.convert('%s\n\n%s' % (ptext, ptext2)))
def test_code(self): # code, inline and block self.assertEqual('<p>Here is some <code>code</code> inline.</p>', markdown_tei.convert('Here is some `code` inline.')) code_snippet = '''require 'redcarpet' markdown = Redcarpet.new("Hello World!") puts markdown.to_html''' self.assertEqual('<code lang="ruby">%s</code>' % code_snippet, markdown_tei.convert('```ruby\n%s\n```' % code_snippet))
def test_images(self): # image imglink = '![Alt text](/path/to/img.png)' tei_imglink = markdown_tei.convert(imglink) self.assert_('<media' in tei_imglink) self.assert_(' mimetype="image/png"' in tei_imglink) self.assert_(' url="/path/to/img.png"' in tei_imglink) self.assert_('<desc><p>Alt text</p></desc>' in tei_imglink) imglink_title = '![Alt text](/path/to/img.jpg "Optional title")' tei_imglink_title = markdown_tei.convert(imglink_title) self.assert_('<desc><head>Optional title</head><p>Alt text</p></desc>' in tei_imglink_title)
def test_headers(self): # headers self.assertEqual('<head type="level1">This is an H1</head>', markdown_tei.convert('# This is an H1')) self.assertEqual('<head type="level2">This is an H2</head>', markdown_tei.convert('## This is an H2')) self.assertEqual('<head type="level6">This is an H6</head>', markdown_tei.convert('###### This is an H6')) # horizontal rule self.assertEqual('<milestone rend="horizontal-rule"/>', markdown_tei.convert('* * *'))
def test_lists(self): # list - unordered unordered_list = '* Red\n' + \ '* Green\n' + \ '* Blue' self.assertEqual('<list><item>Red</item><item>Green</item><item>Blue</item></list>', markdown_tei.convert(unordered_list)) # list - ordered ordered_list = '1. Red\n' + \ '2. Green\n' + \ '3. Blue' self.assertEqual('<list rend="numbered"><item>Red</item><item>Green</item><item>Blue</item></list>', markdown_tei.convert(ordered_list))
def test_footnotes(self): # footnote footnote = '''Footnotes[^1] have a label and content. [^1]: This is some footnote content.''' tei_footnote = markdown_tei.convert(footnote) self.assert_('<p>Footnotes<ref target="#fn1" type="noteAnchor">1</ref> have' in tei_footnote) self.assert_('<note xml:id="fn1" type="footnote"><p>This is some footnote content.</p></note>' in tei_footnote)
def test_blockquote(self): blockquote = '\n'.join([ '> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet', '> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.', '> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.', '> ', '> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse', '> id sem consectetuer libero luctus adipiscing.']) tei_blockquote = markdown_tei.convert(blockquote) self.assert_(tei_blockquote.startswith('<quote><p>This is a blockquote')) self.assert_('risus.</p><p>Donec' in tei_blockquote) self.assert_(tei_blockquote.endswith('adipiscing.</p></quote>'))
def test_tables(self): # table table = ''' Firstly | Secondly ------- | -------- A.1 | A.2 B.1 | B.2 ''' tei_table = markdown_tei.convert(table) self.assert_('<table><head><row><cell role="label">Firstly</cell>' in tei_table) self.assert_('<row><cell role="data">B.1</cell><cell role="data">B.2</cell></row>' in tei_table)
def test_video(self): # using html5 video embedded in markdown mimetype = 'video/mp4' url = 'http://some.video/file.mp4' video = '''<video controls='controls'> <source src='%s' type='%s'/> </video>''' % (url, mimetype) self.assertEqual('<media mimeType="%s" url="%s"/>' % (mimetype, url), markdown_tei.convert(video)) # source attribute tag order shouldn't matter video = '''<video controls='controls'> <source type='%s' src='%s'/> </video>''' % (mimetype, url) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(video)) video_plus = '''<script>console.log("test");</script> %s testing ... again ...''' % video self.assert_(expected in markdown_tei.convert(video_plus)) # inline video block video_attrs = { 'url': 'http://www.w3schools.com/html/mov_bbb.mp4', 'mimetype': 'video/mp4' } inline_video = '''applause text inline with audio<video controls="controls"> <source src="%(url)s" type="%(mimetype)s"/> </video>will cause the TEI to break''' % video_attrs inline_tei_video = markdown_tei.convert(inline_video) self.assert_('<media mimeType="%(mimetype)s" url="%(url)s"/>' % video_attrs in inline_tei_video) # no type attribute - mimetype inferred from video src url mimetype = 'video/mp4' url = 'http://some.video/file.mp4' video = '''<video controls='controls'> <source src='%s'/> </video>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(video)) mimetype = 'video/ogg' url = 'http://some.video/file.ogg' video = '''<video controls='controls'> <source src='%s'/> </video>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(video)) # fallback mimetype where extension is not informative mimetype = 'video/mp4' url = 'http://some.video/file/without/ext/' video = '''<video controls='controls'> <source src='%s'/> </video>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(video))
def test_audio(self): # using html5 audio embedded in markdown mimetype = 'audio/mpeg' url = 'http://some.audio/file.mp3' audio = '''<audio controls='controls'> <source src='%s' type='%s'/> </audio>''' % (url, mimetype) self.assertEqual('<media mimeType="%s" url="%s"/>' % (mimetype, url), markdown_tei.convert(audio)) # source attribute tag order shouldn't matter audio = '''<audio controls='controls'> <source type='%s' src='%s'/> </audio>''' % (mimetype, url) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(audio)) audio_plus = '''<script>console.log("test");</script> %s testing ... again ...''' % audio self.assert_(expected in markdown_tei.convert(audio_plus)) # inline audio block audio_attrs = { 'url': 'http://soundbible.com/mp3/Audience_Applause-Matthiew11-1206899159.mp3', 'mimetype': 'audio/mpeg' } inline_audio = '''applause text inline with audio<audio controls="controls"> <source src="%(url)s" type="%(mimetype)s"/> </audio>will cause the TEI to break''' % audio_attrs inline_tei_audio = markdown_tei.convert(inline_audio) self.assert_('<media mimeType="%(mimetype)s" url="%(url)s"/>' % audio_attrs in inline_tei_audio) # no type attribute - mimetype inferred from audio src url mimetype = 'audio/mpeg' url = 'http://some.audio/file.mp3' audio = '''<audio controls='controls'> <source src='%s'/> </audio>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(audio)) mimetype = 'audio/aac' url = 'http://some.audio/file.aac' audio = '''<audio controls='controls'> <source src='%s'/> </audio>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(audio)) # fallback mimetype where extension is not informative mimetype = 'audio/mpeg' url = 'http://some.audio/file/without/ext/' audio = '''<audio controls='controls'> <source src='%s'/> </audio>''' % (url, ) expected = '<media mimeType="%s" url="%s"/>' % (mimetype, url) self.assertEqual(expected, markdown_tei.convert(audio))
def annotation_to_tei(annotation, teivol): '''Generate a tei note from an annotation. Sets annotation id, slugified tags as ana attribute, username as resp attribute, and annotation content is converted from markdown to TEI. :param annotation: :class:`~readux.annotations.models.Annotation` :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei document, for converting related page ARK uris into TEI ids :returns: :class:`readux.books.tei.Note` ''' # NOTE: annotation created/edited dates are not included here # because they were determined not to be relevant for our purposes # sample note provided by Alice # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note> # convert markdown-formatted text content to tei note_content = markdown_tei.convert(annotation.text) # markdown results could be a list of paragraphs, and not a proper # xml tree; also, pags do not include namespace # wrap in a note element and set the default namespace as tei teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \ (teimap.TEI_NAMESPACE, note_content), tei.Note) # what id do we want? annotation uuid? url? teinote.id = 'annotation-%s' % annotation.id # can't start with numeric teinote.href = absolutize_url(annotation.get_absolute_url()) teinote.type = 'annotation' # if an annotation includes tags, reference them by slugified id in @ana if 'tags' in annotation.info() and annotation.info()['tags']: tags = ' '.join( set('#%s' % slugify(t.strip()) for t in annotation.info()['tags'])) teinote.ana = tags # if the annotation has an associated user, mark the author # as responsible for the note if annotation.user: teinote.resp = annotation.user.username # include full markdown of the annotation, as a backup for losing # content converting from markdown to tei, and for easy display teinote.markdown = annotation.text # if annotation contains related pages, generate a link group if annotation.related_pages: for rel_page in annotation.related_pages: page_ref = tei.Ref(text=rel_page, type='related page') # find tei page identifier from the page ark target = teivol.page_id_by_xlink(rel_page) if target is not None: page_ref.target = '#%s' % target teinote.related_pages.append(page_ref) # if annotation includes citations, add them to the tei # NOTE: expects these citations to be TEI encoded already (generated # by the zotero api and added via meltdown-zotero annotator plugin) if annotation.extra_data.get('citations', None): for bibl in annotation.extra_data['citations']: # zotero tei export currently includes an id that is not # a valid ncname (contains : and /) bibsoup = BeautifulSoup(bibl, 'xml') # convert xml id into the format we want: # zotero-#### (zotero item id) for bibl_struct in bibsoup.find_all('biblStruct'): bibl_struct['xml:id'] = 'zotero-%s' % \ bibl_struct['xml:id'].split('/')[-1] teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(), tei.BiblStruct) teinote.citations.append(teibibl) return teinote
def annotation_to_tei(annotation, teivol): '''Generate a tei note from an annotation. Sets annotation id, slugified tags as ana attribute, username as resp attribute, and annotation content is converted from markdown to TEI. :param annotation: :class:`~readux.annotations.models.Annotation` :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei document, for converting related page ARK uris into TEI ids :returns: :class:`readux.books.tei.Note` ''' # NOTE: annotation created/edited dates are not included here # because they were determined not to be relevant for our purposes # sample note provided by Alice # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note> # convert markdown-formatted text content to tei note_content = markdown_tei.convert(annotation.text) # markdown results could be a list of paragraphs, and not a proper # xml tree; also, pags do not include namespace # wrap in a note element and set the default namespace as tei teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \ (teimap.TEI_NAMESPACE, note_content), tei.Note) # what id do we want? annotation uuid? url? teinote.id = 'annotation-%s' % annotation.id # can't start with numeric teinote.href = absolutize_url(annotation.get_absolute_url()) teinote.type = 'annotation' # if an annotation includes tags, reference them by slugified id in @ana if 'tags' in annotation.info() and annotation.info()['tags']: tags = ' '.join(set('#%s' % slugify(t.strip()) for t in annotation.info()['tags'])) teinote.ana = tags # if the annotation has an associated user, mark the author # as responsible for the note if annotation.user: teinote.resp = annotation.user.username # include full markdown of the annotation, as a backup for losing # content converting from markdown to tei, and for easy display teinote.markdown = annotation.text # if annotation contains related pages, generate a link group if annotation.related_pages: for rel_page in annotation.related_pages: page_ref = tei.Ref(text=rel_page, type='related page') # find tei page identifier from the page ark target = teivol.page_id_by_xlink(rel_page) if target is not None: page_ref.target = '#%s' % target teinote.related_pages.append(page_ref) # if annotation includes citations, add them to the tei # NOTE: expects these citations to be TEI encoded already (generated # by the zotero api and added via meltdown-zotero annotator plugin) if annotation.extra_data.get('citations', None): for bibl in annotation.extra_data['citations']: # zotero tei export currently includes an id that is not # a valid ncname (contains : and /) bibsoup = BeautifulSoup(bibl, 'xml') # convert xml id into the format we want: # zotero-#### (zotero item id) for bibl_struct in bibsoup.find_all('biblStruct'): bibl_struct['xml:id'] = 'zotero-%s' % \ bibl_struct['xml:id'].split('/')[-1] teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(), tei.BiblStruct) teinote.citations.append(teibibl) return teinote