Beispiel #1
0
"""

from StringIO import StringIO
from urllib2 import urlopen
from zipfile import ZipFile

from librarian.dcparser import BookInfo
from librarian import text

from lesmianator import Lesmianator


XML_FILES = "http://www.wolnelektury.pl/media/packs/xml-all.zip"


if __name__ == '__main__':
    poet = Lesmianator()

    xml_zip = ZipFile(StringIO(urlopen(XML_FILES).read()))
    for filename in xml_zip.namelist():
        print filename
        info = BookInfo.from_file(xml_zip.open(filename))

        if u'Wiersz' in info.genres:
            output = StringIO()
            text.transform(xml_zip.open(filename), output, False, ('raw-text',))
            poet.add_text(output.getvalue())

    poet.save()