コード例 #1
0
ファイル: nlpamcat.py プロジェクト: mcomsa/nlpipe
def _get_text(a, to_naf=False, lang='nl'):
    result = "\n\n".join([_normalize(a[x]) for x in ('headline', 'text')])
    if to_naf:
        naf = KafNafParser(type="NAF")
        naf.header = CHeader(type=naf.type)
        naf.root.insert(0, naf.header.get_node())

        naf.set_language(lang)
        naf.set_raw(result)
        naf.set_version("3.0")

        fd = CfileDesc()
        if 'author' in a:
            fd.set_author(a['author'])
        if 'headline' in a:
            fd.set_title(a['headline'])
        if 'date' in a:
            fd.set_creationtime(a['date'])
        if 'medium' in a:
            fd.set_magazine(a['medium'])
        if 'page' in a:
            fd.set_pages(str(a['page']))
        if 'section' in a:
            fd.set_section(a['section'])
        naf.header.set_fileDesc(fd)

        naf.header.set_publicId(a['uuid'])
        #if 'url' in a:
        #    naf.header.set_uri(a['url'])
        b = BytesIO()
        naf.dump(b)
        result = b.getvalue().decode("utf-8")
    return result
コード例 #2
0
ファイル: test_header.py プロジェクト: cltl/KafNafParserPy
def test_header():
    """
    Do the functions to set header attributes work correctly?

    Make sure the run with nosetests -s, otherwise python3 will err
    """

    naf = KafNafParser(type="NAF")
    naf.header = CHeader(type=naf.type)
    naf.root.insert(0, naf.header.get_node())

    naf.header.set_uri("http://example.com")
    assert_equal("http://example.com", naf.header.get_uri())
    naf.header.set_publicId("123")
    assert_equal("123", naf.header.get_publicId())

    # test if properties are serialized/deserialized correctly
    b = BytesIO()
    naf.dump(b)
    b.seek(0)
    naf2 = KafNafParser(b, type="NAF")
    assert_equal("http://example.com", naf2.header.get_uri())
    assert_equal("123", naf2.header.get_publicId())
コード例 #3
0
def test_header():
    """
    Do the functions to set header attributes work correctly?

    Make sure the run with nosetests -s, otherwise python3 will err
    """

    naf = KafNafParser(type="NAF")
    naf.header = CHeader(type=naf.type)
    naf.root.insert(0, naf.header.get_node())

    naf.header.set_uri("http://example.com")
    assert_equal("http://example.com", naf.header.get_uri())
    naf.header.set_publicId("123")
    assert_equal("123", naf.header.get_publicId())

    # test if properties are serialized/deserialized correctly
    b = BytesIO()
    naf.dump(b)
    b.seek(0)
    naf2 = KafNafParser(b, type="NAF")
    assert_equal("http://example.com", naf2.header.get_uri())
    assert_equal("123", naf2.header.get_publicId())