""" from StringIO import StringIO from urllib2 import urlopen from zipfile import ZipFile from librarian.dcparser import BookInfo from librarian import text from lesmianator import Lesmianator XML_FILES = "http://www.wolnelektury.pl/media/packs/xml-all.zip" if __name__ == '__main__': poet = Lesmianator() xml_zip = ZipFile(StringIO(urlopen(XML_FILES).read())) for filename in xml_zip.namelist(): print filename info = BookInfo.from_file(xml_zip.open(filename)) if u'Wiersz' in info.genres: output = StringIO() text.transform(xml_zip.open(filename), output, False, ('raw-text',)) poet.add_text(output.getvalue()) poet.save()