def build_notes(book): task_logger.info(book.slug) with transaction.atomic(): book.notesource_set.all().delete() if book.html_file: from librarian import html from librarian.fn_qualifiers import FN_QUALIFIERS for anchor, fn_type, qualifiers, text_str, html_str in \ html.extract_annotations(book.html_file.path): sort_key = sortify(text_str).strip()[:128] language = book.language notes = Note.objects.filter(sort_key=sort_key, fn_type=fn_type, language=language, html=html_str) if notes: note = notes[0] else: note = Note.objects.create( sort_key=sort_key, html=html_str, fn_type=fn_type, language=language ) qualifier_objects = [] for qualifier in qualifiers: obj, created = Qualifier.objects.get_or_create( qualifier=qualifier, defaults={ 'name': FN_QUALIFIERS.get(qualifier, '') }) qualifier_objects.append(obj) note.qualifiers = qualifier_objects note.notesource_set.create(book=book, anchor=anchor) Note.objects.filter(notesource=None).delete()
def build_notes(book): task_logger.info(book.slug) with transaction.atomic(): book.notesource_set.all().delete() if book.html_file: from librarian import html from librarian.fn_qualifiers import FN_QUALIFIERS for anchor, fn_type, qualifiers, text_str, html_str in \ html.extract_annotations(book.html_file.path): sort_key = sortify(text_str).strip()[:128] language = book.language notes = Note.objects.filter(sort_key=sort_key, fn_type=fn_type, language=language, html=html_str) if notes: note = notes[0] else: note = Note.objects.create(sort_key=sort_key, html=html_str, fn_type=fn_type, language=language) qualifier_objects = [] for qualifier in qualifiers: obj, created = Qualifier.objects.get_or_create( qualifier=qualifier, defaults={'name': FN_QUALIFIERS.get(qualifier, '')}) qualifier_objects.append(obj) note.qualifiers = qualifier_objects note.notesource_set.create(book=book, anchor=anchor) Note.objects.filter(notesource=None).delete()
def build_notes(book): Note.objects.filter(book=book).delete() if book.html_file: from librarian import html for anchor, text_str, html_str in html.extract_annotations(book.html_file.path): Note.objects.create(book=book, anchor=anchor, html=html_str, sort_key=sortify(text_str).strip()[:128])
def build_notes(book): Note.objects.filter(book=book).delete() if book.html_file: from librarian import html for anchor, text_str, html_str in html.extract_annotations( book.html_file.path): Note.objects.create(book=book, anchor=anchor, html=html_str, sort_key=sortify(text_str).strip()[:128])
def test_annotations(): annotations = ( ('<pe/>', ('pe', [], '', '<p></p>'), 'Empty footnote'), ('<pr>Definiendum --- definiens.</pr>', ('pr', [], 'Definiendum \u2014 definiens.', '<p>Definiendum \u2014 definiens.</p>'), 'Plain footnote.'), ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', ('pt', [], 'Definiendum \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>' ), 'Standard footnote.'), ('<pr>Definiendum (łac.) --- definiens.</pr>', ('pr', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 definiens.</p>'), 'Plain footnote with qualifier'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', ('pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>' ), 'Standard footnote with qualifier.'), ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', ('pt', ['daw.'], 'Definiendum (daw.) \u2014 definiens.', '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>' ), 'Standard footnote with leading whitespace and qualifier.'), ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', ('pr', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Plain footnote with qualifier and some emphasis.'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', ('pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Standard footnote with qualifier and some emphasis.'), ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', ( 'pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.', '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>', ), 'Footnote with a second parentheses and mdash.'), ('<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykły) --- ' 'częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>', ('pe', ['daw.', 'niem.'], 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, ' 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) ' '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>' ), 'Footnote with multiple and qualifiers and emphasis.'), ) xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join( t[0] for t in annotations) html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file() res_annotations = list(extract_annotations(html)) for i, (src, expected, name) in enumerate(annotations): yield _test_annotation, expected, res_annotations[i], name
def test_annotations(): annotations = ( ("<pe/>", ("pe", [], "", "<p></p>"), "Empty footnote"), ( "<pr>Definiendum --- definiens.</pr>", ("pr", [], "Definiendum \u2014 definiens.", "<p>Definiendum \u2014 definiens.</p>"), "Plain footnote.", ), ( "<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>", ( "pt", [], "Definiendum \u2014 definiens.", '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>', ), "Standard footnote.", ), ( "<pr>Definiendum (łac.) --- definiens.</pr>", ("pr", ["łac."], "Definiendum (łac.) \u2014 definiens.", "<p>Definiendum (łac.) \u2014 definiens.</p>"), "Plain footnote with qualifier", ), ( "<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>", ( "pe", ["łac."], "Definiendum (łac.) \u2014 definiens.", '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>', ), "Standard footnote with qualifier.", ), ( "<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>", ( "pt", ["daw."], "Definiendum (daw.) \u2014 definiens.", '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>', ), "Standard footnote with leading whitespace and qualifier.", ), ( "<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>", ( "pr", ["łac."], "Definiendum (łac.) \u2014 definiens.", '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>', ), "Plain footnote with qualifier and some emphasis.", ), ( "<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>", ( "pe", ["łac."], "Definiendum (łac.) \u2014 definiens.", '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>', ), "Standard footnote with qualifier and some emphasis.", ), ( "<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>", ( "pe", ["łac."], "Definiendum (łac.) \u2014 definiens (some) \u2014 more text.", "<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>", ), "Footnote with a second parentheses and mdash.", ), ( "<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykły) --- częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>", ( "pe", ["daw.", "niem."], "gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.", '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) \u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>', ), "Footnote with multiple and qualifiers and emphasis.", ), ) xml_src = """<utwor><akap> %s </akap></utwor>""" % "".join(t[0] for t in annotations) html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file() res_annotations = list(extract_annotations(html)) for i, (src, expected, name) in enumerate(annotations): yield _test_annotation, expected, res_annotations[i], name
def test_annotations(): annotations = ( ('<pe/>', ( 'pe', [], '', '<p></p>' ), 'Empty footnote'), ('<pr>Definiendum --- definiens.</pr>', ( 'pr', [], 'Definiendum \u2014 definiens.', '<p>Definiendum \u2014 definiens.</p>' ), 'Plain footnote.'), ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', ( 'pt', [], 'Definiendum \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>' ), 'Standard footnote.'), ('<pr>Definiendum (łac.) --- definiens.</pr>', ( 'pr', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 definiens.</p>' ), 'Plain footnote with qualifier'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', ( 'pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>' ), 'Standard footnote with qualifier.'), ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', ( 'pt', ['daw.'], 'Definiendum (daw.) \u2014 definiens.', '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>' ), 'Standard footnote with leading whitespace and qualifier.'), ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', ( 'pr', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Plain footnote with qualifier and some emphasis.'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', ( 'pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Standard footnote with qualifier and some emphasis.'), ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', ( 'pe', ['łac.'], 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.', '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>', ), 'Footnote with a second parentheses and mdash.'), ('<pe><slowo_obce>gemajna</slowo_obce> (daw., z niem. <slowo_obce>gemein</slowo_obce>: zwykły) --- ' 'częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</pe>', ( 'pe', ['daw.', 'niem.'], 'gemajna (daw., z niem. gemein: zwykły) \u2014 częściej: gemajn, ' 'szeregowiec w wojsku polskim cudzoziemskiego autoramentu.', '<p><em class="foreign-word">gemajna</em> (daw., z niem. <em class="foreign-word">gemein</em>: zwykły) ' '\u2014 częściej: gemajn, szeregowiec w wojsku polskim cudzoziemskiego autoramentu.</p>' ), 'Footnote with multiple and qualifiers and emphasis.'), ) xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join( t[0] for t in annotations) html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file() res_annotations = list(extract_annotations(html)) for i, (src, expected, name) in enumerate(annotations): yield _test_annotation, expected, res_annotations[i], name
def test_annotations(): annotations = ( ('<pe/>', ( 'pe', None, '', '<p></p>' ), 'Empty footnote'), ( '<pr>Definiendum --- definiens.</pr>', ( 'pr', None, 'Definiendum \u2014 definiens.', '<p>Definiendum \u2014 definiens.</p>' ), 'Plain footnote.'), ('<pt><slowo_obce>Definiendum</slowo_obce> --- definiens.</pt>', ( 'pt', None, 'Definiendum \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> \u2014 definiens.</p>' ), 'Standard footnote.'), ('<pr>Definiendum (łac.) --- definiens.</pr>', ( 'pr', 'łac.', 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 definiens.</p>' ), 'Plain footnote with qualifier'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- definiens.</pe>', ( 'pe', 'łac.', 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 definiens.</p>' ), 'Standard footnote with qualifier.'), ('<pt> <slowo_obce>Definiendum</slowo_obce> (daw.) --- definiens.</pt>', ( 'pt', 'daw.', 'Definiendum (daw.) \u2014 definiens.', '<p> <em class="foreign-word">Definiendum</em> (daw.) \u2014 definiens.</p>' ), 'Standard footnote with leading whitespace and qualifier.'), ('<pr>Definiendum (łac.) --- <slowo_obce>definiens</slowo_obce>.</pr>', ( 'pr', 'łac.', 'Definiendum (łac.) \u2014 definiens.', '<p>Definiendum (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Plain footnote with qualifier and some emphasis.'), ('<pe><slowo_obce>Definiendum</slowo_obce> (łac.) --- <slowo_obce>definiens</slowo_obce>.</pe>', ( 'pe', 'łac.', 'Definiendum (łac.) \u2014 definiens.', '<p><em class="foreign-word">Definiendum</em> (łac.) \u2014 <em class="foreign-word">definiens</em>.</p>' ), 'Standard footnote with qualifier and some emphasis.'), ('<pe>Definiendum (łac.) --- definiens (some) --- more text.</pe>', ( 'pe', 'łac.', 'Definiendum (łac.) \u2014 definiens (some) \u2014 more text.', '<p>Definiendum (łac.) \u2014 definiens (some) \u2014 more text.</p>', ), 'Footnote with a second parentheses and mdash.'), ) xml_src = '''<utwor><akap> %s </akap></utwor>''' % "".join( t[0] for t in annotations) html = WLDocument.from_string(xml_src, parse_dublincore=False).as_html().get_file() res_annotations = list(extract_annotations(html)) for i, (src, expected, name) in enumerate(annotations): yield _test_annotation, expected, res_annotations[i], name