def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None): uniqueid = unicode_str(uniqueid) if uniqueid in self.id_to_href: raise WrapperException('Manifest Id is not unique') basename = unicode_str(basename) mime = unicode_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime == "application/x-dtbncx+xml" and self.epub_version.startswith( "2"): raise WrapperException('Can not add or remove an ncx under epub2') group = mime_group_map.get(mime, "Misc") default_path = self.group_paths[group][0] bookpath = basename if default_path != "": bookpath = default_path + "/" + basename href = buildRelativePath(self.opfbookpath, bookpath) if href in self.href_to_id: raise WrapperException('Basename already exists') # now actually write out the new file filepath = bookpath.replace("/", os.sep) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if mime in TEXT_MIMETYPES or isinstance(data, text_type): data = utf8_str(data) with open(filepath, 'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.id_to_props[uniqueid] = properties self.id_to_fall[uniqueid] = fallback self.id_to_over[uniqueid] = overlay self.id_to_bookpath[uniqueid] = bookpath self.href_to_id[href] = uniqueid self.bookpath_to_id[bookpath] = uniqueid self.added.append(uniqueid) self.modified[self.opfbookpath] = 'file' return uniqueid
def map_mediatype_to_group(self, mtype, ow): mtype = _unicodestr(mtype) return mime_group_map.get(mtype, ow)
def _parseData(self): cnt = 0 for prefix, tname, tattr, tcontent in self._opf_tag_iter(): if self._debug: print(" Parsing OPF: ", prefix, tname, tattr, tcontent) # package if tname == "package": ver = tattr.pop("version", "2.0") uid = tattr.pop("unique-identifier", "bookid") self.package = (ver, uid, tattr) continue # metadata if tname == "metadata": self.metadata_attr = tattr continue if tname in ["meta", "link" ] or tname.startswith("dc:") and "metadata" in prefix: self.metadata.append((tname, tattr, tcontent)) if tattr.get("name", "") == "cover": self.cover_id = tattr.get("content", None) continue # manifest if tname == "item" and "manifest" in prefix: nid = "xid%03d" % cnt cnt += 1 id = tattr.pop("id", nid) href = tattr.pop("href", '') mtype = tattr.pop("media-type", '') if mtype == "text/html": mtype = "application/xhtml+xml" if mtype not in mime_group_map: print("****Opf_Parser Warning****: Unknown MediaType: ", mtype) href = unquoteurl(href) properties = tattr.pop("properties", None) fallback = tattr.pop("fallback", None) overlay = tattr.pop("media-overlay", None) # external resources are now allowed in the opf under epub3 # we can ignore fragments here as these are links to files self.manifest_id_to_href[id] = href bookpath = "" if href.find(":") == -1: bookpath = buildBookPath(href, self.opf_dir) self.manifest_id_to_bookpath[id] = bookpath self.manifest_id_to_mime[id] = mtype # self.bookpaths.append(bookpath) group = mime_group_map.get(mtype, '') if bookpath != "" and group != "": folderlst = self.group_folder.get(group, []) countlst = self.group_count.get(group, []) sdir = startingDir(bookpath) if sdir not in folderlst: folderlst.append(sdir) countlst.append(1) else: pos = folderlst.index(sdir) countlst[pos] = countlst[pos] + 1 self.group_folder[group] = folderlst self.group_count[group] = countlst self.manifest_id_to_properties[id] = properties self.manifest_id_to_fallback[id] = fallback self.manifest_id_to_overlay[id] = overlay continue # spine if tname == "spine": if tattr is not None: self.spine_ppd = tattr.get("page-progression-direction", None) continue if tname == "itemref" and "spine" in prefix: idref = tattr.pop("idref", "") linear = tattr.pop("linear", None) properties = tattr.pop("properties", None) self.spine.append((idref, linear, properties)) continue # guide if tname == "reference" and "guide" in prefix: type = tattr.pop("type", '') title = tattr.pop("title", '') href = unquoteurl(tattr.pop("href", '')) self.guide.append((type, title, href)) continue # bindings (stored but ignored for now) if tname in ["mediaType", "mediatype"] and "bindings" in prefix: mtype = tattr.pop("media-type", "") handler = tattr.pop("handler", "") self.bindings.append((mtype, handler)) continue # determine unique ShortPathName for each bookpath # start with filename and work back up the folders # spn = {} # dupset = set() # nameset = {} # lvl = 1 # for bkpath in self.bookpaths: # aname = build_short_name(bkpath, lvl) # spn[bkpath] = aname # if aname in nameset: # dupset.add(aname) # nameset[aname].append(bkpath) # else: # nameset[aname]=[bkpath] # # now work just through any to-do list of duplicates # until all duplicates are gone # # todolst = list(dupset) # while(todolst): # dupset = set() # lvl += 1 # for aname in todolst: # bklst = nameset[aname] # del nameset[aname] # for bkpath in bklst: # newname = build_short_name(bkpath, lvl) # spn[bkpath] = newname # if newname in nameset: # dupset.add(newname) # nameset[newname].append(bkpath) # else: # nameset[newname] = [bkpath] # todolst = list(dupset) # finally sort by number of files in dir to find default folders for each group dirlst = [] use_lower_case = False for group in self.group_folder.keys(): folders = self.group_folder[group] cnts = self.group_count[group] folders = [x for _, x in sorted(zip(cnts, folders), reverse=True)] self.group_folder[group] = folders if group in [ "Text", "Styles", "Images", "Audio", "Fonts", "Video", "Misc" ]: afolder = folders[0] if afolder.find(group.lower()) > -1: use_lower_case = True dirlst.append(folders[0]) # now back fill any missing values # commonbase will end with a / commonbase = longestCommonPath(dirlst) if commonbase == "/": commonbase = "" for group in ["Styles", "Images", "Audio", "Fonts", "Video", "Misc"]: folders = self.group_folder.get(group, []) gname = group if use_lower_case: gname = gname.lower() if not folders: folders = [commonbase + gname] self.group_folder[group] = folders