def _get_text(a, to_naf=False, lang='nl'): result = "\n\n".join([_normalize(a[x]) for x in ('headline', 'text')]) if to_naf: naf = KafNafParser(type="NAF") naf.header = CHeader(type=naf.type) naf.root.insert(0, naf.header.get_node()) naf.set_language(lang) naf.set_raw(result) naf.set_version("3.0") fd = CfileDesc() if 'author' in a: fd.set_author(a['author']) if 'headline' in a: fd.set_title(a['headline']) if 'date' in a: fd.set_creationtime(a['date']) if 'medium' in a: fd.set_magazine(a['medium']) if 'page' in a: fd.set_pages(str(a['page'])) if 'section' in a: fd.set_section(a['section']) naf.header.set_fileDesc(fd) naf.header.set_publicId(a['uuid']) #if 'url' in a: # naf.header.set_uri(a['url']) b = BytesIO() naf.dump(b) result = b.getvalue().decode("utf-8") return result
def test_header(): """ Do the functions to set header attributes work correctly? Make sure the run with nosetests -s, otherwise python3 will err """ naf = KafNafParser(type="NAF") naf.header = CHeader(type=naf.type) naf.root.insert(0, naf.header.get_node()) naf.header.set_uri("http://example.com") assert_equal("http://example.com", naf.header.get_uri()) naf.header.set_publicId("123") assert_equal("123", naf.header.get_publicId()) # test if properties are serialized/deserialized correctly b = BytesIO() naf.dump(b) b.seek(0) naf2 = KafNafParser(b, type="NAF") assert_equal("http://example.com", naf2.header.get_uri()) assert_equal("123", naf2.header.get_publicId())