def _is_book_modified(book): from booki.editor.views import getVersion from time import mktime bv = getVersion(book, None) created = mktime(book.created.timetuple()) for chapter in models.Chapter.objects.filter(version=bv): logWarning("chapter %s created %s mod %s" % (chapter.id, book.created, chapter.modified)) #5 seconds grace before a chapter is deemed modified if created + 5 < mktime(chapter.modified.timetuple()): return True return False
def importBookFromURL(user, bookURL, createTOC=False, **extraOptions): """ Imports book from the url. Creates project and book for it. """ # download it try: f = urllib2.urlopen(bookURL) data = f.read() f.close() except urllib2.URLError, e: logWarning("couldn't read %r: %s" % (bookURL, e)) logWarning(traceback.format_exc()) raise
def flatten(url, prefix): scheme, addr, path, query, frag = urlsplit(url) if scheme: #http, ftp, etc, ... ignore it return url path = normpath(join(here, path)) if not path.startswith(base + prefix): #What is best here? make an absolute http:// link? #for now, ignore it. logWarning("got a wierd link: %r in %s resolves to %r, wanted start of %s" % (url, here, path, base + prefix)) return url path = path[len(base):] logWarning("turning %r into %r" % (url, path)) return urlunsplit(('', '', path, query, frag))
def view_books_by_id(request, scheme): """ Find books with IDs of the requested schema, and return mapping of IDs to urls that match those books. @type request: C{django.http.HttpRequest} @param request: Django Request. @type scheme: C{string} """ logWarning("looking for books with %r identifier" % scheme) from booki.bookizip import DC from booki.editor.views import getVersion from urllib import urlencode namefilter = '{%s}identifier{%s}' % (DC, scheme) data = {} #from django.db import connection, transaction #cursor = connection.cursor() books = models.Book.objects.raw('SELECT editor_book.*, editor_info.value_string AS remote_id' ' FROM editor_book LEFT OUTER JOIN editor_info ON' ' (editor_book.id=editor_info.book_id) WHERE' ' editor_info.name=%s', (namefilter,)) for book in books: if book.hidden: continue values = data.setdefault(book.remote_id, []) values.append(book) logWarning(values) #data keys are identifiers in the set scheme, and the values are # a list of books with that identifier. # # depending on the mode, some books will be dropped. logWarning(data) selected_books = [] for ID, books in data.iteritems(): for book in books: if _is_book_modified(book): selected_books.append((ID, book.url_title, True)) break else: selected_books.append((ID, books[0].url_title, False)) msg = {} for ID, booki_id, modified in selected_books: msg[ID] = {'edit': '%s/%s/edit/' % (BOOKI_URL, booki_id), #edit link 'epub': (None if not modified #epub link else OBJAVI_URL + '?' + urlencode( {'server': THIS_BOOKI_SERVER, 'book': booki_id, 'mode': 'epub', 'destination': 'download', }) ) } s = json.dumps(msg) response = HttpResponse(s, mimetype="application/json") return response
def _is_book_modified(book): """ Checks if specific book has been modified. @todo: This has to be changed, this is terrible way to check difference in timestamps. This has been developed for archive.org, and is not in use anymore. @type book: C{booki.editor.models.Book} @param book: Book object @rtype: C{bool} @return: Returns True or False """ from booki.editor.views import getVersion from time import mktime bv = getVersion(book, None) created = mktime(book.created.timetuple()) for chapter in models.Chapter.objects.filter(version=bv): logWarning("chapter %s created %s mod %s" % (chapter.id, book.created, chapter.modified)) #5 seconds grace before a chapter is deemed modified if created + 5 < mktime(chapter.modified.timetuple()): return True return False
def exportBook(book_version): from booki import bookizip import time starttime = time.time() (zfile, zname) = tempfile.mkstemp() spine = [] toc_top = [] toc_current = toc_top waiting_for_url = [] info = { "version": 1, "TOC": toc_top, "spine": spine, "metadata": _format_metadata(book_version.book), "manifest": {}, } bzip = bookizip.BookiZip(zname, info=info) chapter_n = 1 for i, chapter in enumerate(models.BookToc.objects.filter(version=book_version).order_by("-weight")): if chapter.chapter: # It's a real chapter! With content! try: content = _fix_content(book_version.book, chapter, chapter_n) except: continue chapter_n += 1 ID = "ch%03d_%s" % (i, chapter.chapter.url_title.encode("utf-8")) filename = ID + ".html" toc_current.append({"title": chapter.chapter.title, "url": filename, "type": "chapter", "role": "text"}) # If this is the first chapter in a section, lend our url # to the section, which has no content and thus no url of # its own. If this section was preceded by an empty # section, it will be waiting too, hence "while" rather # than "if". while waiting_for_url: section = waiting_for_url.pop() section["url"] = filename bzip.add_to_package(ID, filename, content, "text/html") spine.append(ID) else: # A new top level section. title = chapter.name.encode("utf-8") ID = "s%03d_%s" % (i, bookiSlugify(title)) toc_current = [] section = {"title": title, "url": "", "type": "booki-section", "children": toc_current} toc_top.append(section) waiting_for_url.append(section) # Attachments are images (and perhaps more). They do not know # whether they are currently in use, or what chapter they belong # to, so we add them all. # XXX scan for img links while adding chapters, and only add those. for i, attachment in enumerate(models.Attachment.objects.filter(version=book_version)): try: f = open(attachment.attachment.name, "rb") blob = f.read() f.close() except (IOError, OSError), e: msg = "couldn't read attachment %s" % e logWarning(msg) continue fn = os.path.basename(attachment.attachment.name.encode("utf-8")) ID = "att%03d_%s" % (i, bookiSlugify(fn)) if "." in fn: _, ext = fn.rsplit(".", 1) mediatype = bookizip.MEDIATYPES.get(ext.lower(), bookizip.MEDIATYPES[None]) else: mediatype = bookizip.MEDIATYPES[None] bzip.add_to_package(ID, "static/%s" % fn, blob, mediatype)
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" from booki.utils.log import logChapterHistory # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read("info.json")) logWarning("Loaded json file %r" % info) metadata = info["metadata"] manifest = info["manifest"] TOC = info["TOC"] if extraOptions.get("book_title", None): bookTitle = extraOptions["book_title"] else: bookTitle = get_metadata(metadata, "title", ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) logWarning("Chose unique book title %r" % bookTitle) if extraOptions.get("book_url", None): bookURL = extraOptions["book_url"] else: bookURL = None book = createBook(user, bookTitle, status="new", bookURL=bookURL) if extraOptions.get("hidden"): book.hidden = True book.save() # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "new" stat = models.BookStatus.objects.filter(book=book, name="new")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 # is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = bookiSlugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=None, weight=n, typeof=2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) # content = p.sub(r' src="../\1"', content) chapter = models.Chapter( book=book, version=book.version, url_title=urlName, title=chapterName, status=stat, content=content, created=now, modified=now, ) chapter.save() history = logChapterHistory( chapter=chapter, content=content, user=user, comment="", revision=chapter.revision ) if createTOC: c = models.BookToc( book=book, version=book.version, name=chapterName, chapter=chapter, weight=n, typeof=1 ) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="new")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != "text/html": attachmentName = item["url"] if attachmentName.startswith("static/"): att = models.Attachment(book=book, version=book.version, status=stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = "{%s}" % namespace if namespace else "" for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): # schema, if it is set, describes the value's format. # for example, an identifier might be an ISBN. sc = "{%s}" % scheme if scheme else "" key = "%s%s%s" % (ns, keyword, sc) for v in values: if not v: continue try: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() except: # For now just ignore any kind of error here. # Considering we don't handle metadata as we # should it is not such a problem. pass zf.close() return book
# download it try: f = urllib2.urlopen(bookURL) data = f.read() f.close() except urllib2.URLError, e: logWarning("couldn't read %r: %s" % (bookURL, e)) logWarning(traceback.format_exc()) raise try: zf = StringIO(data) book = importBookFromFile(user, zf, createTOC, **extraOptions) zf.close() except Exception, e: logWarning("couldn't make book from %r: %s" % (bookURL, e)) logWarning(traceback.format_exc()) raise return book def importBookFromUrl2(user, baseurl, args, **extraOptions): args["mode"] = "zip" url = baseurl + "?" + urlencode(args) return importBookFromURL(user, url, createTOC=True, **extraOptions) def expand_macro(chapter): try: t = template.loader.get_template_from_string("{% load booki_tags %} {% booki_format content %}")
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read('info.json')) logWarning("Loaded json file %r" % info) metadata = info['metadata'] manifest = info['manifest'] TOC = info['TOC'] if extraOptions.get('book_title', None): bookTitle = extraOptions['book_title'] else: bookTitle = get_metadata(metadata, 'title', ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) if extraOptions.get('book_url', None): bookURL = extraOptions['book_url'] else: bookURL = None book = createBook(user, bookTitle, status = "imported", bookURL = bookURL) # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "imported" stat = models.BookStatus.objects.filter(book=book, name="imported")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 #is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = slugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book = book, version = book.version, name = chapterName, chapter = None, weight = n, typeof = 2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) #content = p.sub(r' src="../\1"', content) chapter = models.Chapter(book = book, version = book.version, url_title = urlName, title = chapterName, status = stat, content = content, created = now, modified = now) chapter.save() if createTOC: c = models.BookToc(book = book, version = book.version, name = chapterName, chapter = chapter, weight = n, typeof = 1) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="imported")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != 'text/html': attachmentName = item['url'] if attachmentName.startswith("static/"): att = models.Attachment(book = book, version = book.version, status = stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = ('{%s}' % namespace if namespace else '') for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): #schema, if it is set, describes the value's format. #for example, an identifier might be an ISBN. sc = ('{%s}' % scheme if scheme else '') key = "%s%s%s" % (ns, keyword, sc) for v in values: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() zf.close()
# download it try: f = urllib2.urlopen(bookURL) data = f.read() f.close() except urllib2.URLError, e: logWarning("couldn't read %r: %s" % (bookURL, e)) logWarning(traceback.format_exc()) raise try: zf = StringIO(data) importBookFromFile(user, zf, createTOC, **extraOptions) zf.close() except Exception, e: logWarning("couldn't make book from %r: %s" % (bookURL, e)) logWarning(traceback.format_exc()) raise def importBookFromUrl2(user, baseurl, args, **extraOptions): args['mode'] = 'zip' url = baseurl + "?" + urlencode(args) importBookFromURL(user, url, createTOC=True, **extraOptions) def expand_macro(chapter): try: t = template.loader.get_template_from_string( '{% load booki_tags %} {% booki_format content %}') return t.render(template.Context({"content": chapter}))
def exportBook(book_version): from booki import bookizip import time starttime = time.time() (zfile, zname) = tempfile.mkstemp() spine = [] toc_top = [] toc_current = toc_top waiting_for_url = [] info = { "version": 1, "TOC": toc_top, "spine": spine, "metadata": _format_metadata(book_version.book), "manifest": {} } bzip = bookizip.BookiZip(zname, info=info) chapter_n = 1 for i, chapter in enumerate( models.BookToc.objects.filter( version=book_version).order_by("-weight")): if chapter.chapter: # It's a real chapter! With content! try: content = _fix_content(book_version.book, chapter, chapter_n) except: continue chapter_n += 1 ID = "ch%03d_%s" % (i, chapter.chapter.url_title.encode('utf-8')) filename = ID + '.html' toc_current.append({ "title": chapter.chapter.title, "url": filename, "type": "chapter", "role": "text" }) # If this is the first chapter in a section, lend our url # to the section, which has no content and thus no url of # its own. If this section was preceded by an empty # section, it will be waiting too, hence "while" rather # than "if". while waiting_for_url: section = waiting_for_url.pop() section["url"] = filename bzip.add_to_package(ID, filename, content, "text/html") spine.append(ID) else: #A new top level section. title = chapter.name.encode("utf-8") ID = "s%03d_%s" % (i, booktype_slugify(unicode(title))) toc_current = [] section = { "title": title, "url": '', "type": "booki-section", "children": toc_current } toc_top.append(section) waiting_for_url.append(section) #Attachments are images (and perhaps more). They do not know #whether they are currently in use, or what chapter they belong #to, so we add them all. #XXX scan for img links while adding chapters, and only add those. for i, attachment in enumerate( models.Attachment.objects.filter(version=book_version)): try: f = open(attachment.attachment.name, "rb") blob = f.read() f.close() except (IOError, OSError), e: msg = "couldn't read attachment %s" % e logWarning(msg) continue fn = os.path.basename(attachment.attachment.name.encode("utf-8")) ID = "att%03d_%s" % (i, booktype_slugify(unicode(fn))) if '.' in fn: _, ext = fn.rsplit('.', 1) mediatype = bookizip.MEDIATYPES.get(ext.lower(), bookizip.MEDIATYPES[None]) else: mediatype = bookizip.MEDIATYPES[None] bzip.add_to_package(ID, "static/%s" % fn, blob, mediatype)
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" from booki.utils.log import logChapterHistory # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read('info.json')) logWarning("Loaded json file %r" % info) metadata = info['metadata'] manifest = info['manifest'] TOC = info['TOC'] if extraOptions.get('book_title', None): bookTitle = extraOptions['book_title'] else: bookTitle = get_metadata(metadata, 'title', ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) logWarning("Chose unique book title %r" % bookTitle) if extraOptions.get('book_url', None): bookURL = extraOptions['book_url'] else: bookURL = None book = create_book(user, bookTitle, status="new", bookURL=bookURL) if extraOptions.get("hidden"): book.hidden = True book.save() # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "new" stat = models.BookStatus.objects.filter(book=book, name="new")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 #is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = booktype_slugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=None, weight=n, typeof=2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) #content = p.sub(r' src="../\1"', content) chapter = models.Chapter(book=book, version=book.version, url_title=urlName, title=chapterName, status=stat, content=content, created=now, modified=now) chapter.save() history = logChapterHistory(chapter=chapter, content=content, user=user, comment="", revision=chapter.revision) if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=chapter, weight=n, typeof=1) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="new")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != 'text/html': attachmentName = item['url'] if attachmentName.startswith("static/"): att = models.Attachment(book=book, version=book.version, status=stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = ('{%s}' % namespace if namespace else '') for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): #schema, if it is set, describes the value's format. #for example, an identifier might be an ISBN. sc = ('{%s}' % scheme if scheme else '') key = "%s%s%s" % (ns, keyword, sc) for v in values: if not v: continue try: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() except: # For now just ignore any kind of error here. # Considering we don't handle metadata as we # should it is not such a problem. pass zf.close() return book