Esempio n. 1
0
 def addfile(self,
             uniqueid,
             basename,
             data,
             mime=None,
             properties=None,
             fallback=None,
             overlay=None):
     uniqueid = unicode_str(uniqueid)
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime == "application/x-dtbncx+xml" and self.epub_version.startswith(
             "2"):
         raise WrapperException('Can not add or remove an ncx under epub2')
     group = mime_group_map.get(mime, "Misc")
     default_path = self.group_paths[group][0]
     bookpath = basename
     if default_path != "":
         bookpath = default_path + "/" + basename
     href = buildRelativePath(self.opfbookpath, bookpath)
     if href in self.href_to_id:
         raise WrapperException('Basename already exists')
     # now actually write out the new file
     filepath = bookpath.replace("/", os.sep)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime in TEXT_MIMETYPES or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath, 'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = properties
     self.id_to_fall[uniqueid] = fallback
     self.id_to_over[uniqueid] = overlay
     self.id_to_bookpath[uniqueid] = bookpath
     self.href_to_id[href] = uniqueid
     self.bookpath_to_id[bookpath] = uniqueid
     self.added.append(uniqueid)
     self.modified[self.opfbookpath] = 'file'
     return uniqueid
Esempio n. 2
0
 def map_mediatype_to_group(self, mtype, ow):
     mtype = _unicodestr(mtype)
     return mime_group_map.get(mtype, ow)
Esempio n. 3
0
    def _parseData(self):
        cnt = 0
        for prefix, tname, tattr, tcontent in self._opf_tag_iter():
            if self._debug:
                print("   Parsing OPF: ", prefix, tname, tattr, tcontent)
            # package
            if tname == "package":
                ver = tattr.pop("version", "2.0")
                uid = tattr.pop("unique-identifier", "bookid")
                self.package = (ver, uid, tattr)
                continue
            # metadata
            if tname == "metadata":
                self.metadata_attr = tattr
                continue
            if tname in ["meta", "link"
                         ] or tname.startswith("dc:") and "metadata" in prefix:
                self.metadata.append((tname, tattr, tcontent))
                if tattr.get("name", "") == "cover":
                    self.cover_id = tattr.get("content", None)
                continue
            # manifest
            if tname == "item" and "manifest" in prefix:
                nid = "xid%03d" % cnt
                cnt += 1
                id = tattr.pop("id", nid)
                href = tattr.pop("href", '')
                mtype = tattr.pop("media-type", '')
                if mtype == "text/html":
                    mtype = "application/xhtml+xml"
                if mtype not in mime_group_map:
                    print("****Opf_Parser Warning****: Unknown MediaType: ",
                          mtype)
                href = unquoteurl(href)
                properties = tattr.pop("properties", None)
                fallback = tattr.pop("fallback", None)
                overlay = tattr.pop("media-overlay", None)

                # external resources are now allowed in the opf under epub3
                # we can ignore fragments here as these are links to files
                self.manifest_id_to_href[id] = href

                bookpath = ""
                if href.find(":") == -1:
                    bookpath = buildBookPath(href, self.opf_dir)
                self.manifest_id_to_bookpath[id] = bookpath
                self.manifest_id_to_mime[id] = mtype
                # self.bookpaths.append(bookpath)
                group = mime_group_map.get(mtype, '')
                if bookpath != "" and group != "":
                    folderlst = self.group_folder.get(group, [])
                    countlst = self.group_count.get(group, [])
                    sdir = startingDir(bookpath)
                    if sdir not in folderlst:
                        folderlst.append(sdir)
                        countlst.append(1)
                    else:
                        pos = folderlst.index(sdir)
                        countlst[pos] = countlst[pos] + 1
                    self.group_folder[group] = folderlst
                    self.group_count[group] = countlst
                self.manifest_id_to_properties[id] = properties
                self.manifest_id_to_fallback[id] = fallback
                self.manifest_id_to_overlay[id] = overlay
                continue
            # spine
            if tname == "spine":
                if tattr is not None:
                    self.spine_ppd = tattr.get("page-progression-direction",
                                               None)
                continue
            if tname == "itemref" and "spine" in prefix:
                idref = tattr.pop("idref", "")
                linear = tattr.pop("linear", None)
                properties = tattr.pop("properties", None)
                self.spine.append((idref, linear, properties))
                continue
            # guide
            if tname == "reference" and "guide" in prefix:
                type = tattr.pop("type", '')
                title = tattr.pop("title", '')
                href = unquoteurl(tattr.pop("href", ''))
                self.guide.append((type, title, href))
                continue
            # bindings (stored but ignored for now)
            if tname in ["mediaType", "mediatype"] and "bindings" in prefix:
                mtype = tattr.pop("media-type", "")
                handler = tattr.pop("handler", "")
                self.bindings.append((mtype, handler))
                continue

        # determine unique ShortPathName for each bookpath
        # start with filename and work back up the folders
        # spn = {}
        # dupset = set()
        # nameset = {}
        # lvl = 1
        # for bkpath in self.bookpaths:
        #     aname = build_short_name(bkpath, lvl)
        #     spn[bkpath] = aname
        #     if aname in nameset:
        #         dupset.add(aname)
        #         nameset[aname].append(bkpath)
        #     else:
        #         nameset[aname]=[bkpath]
        #
        # now work just through any to-do list of duplicates
        # until all duplicates are gone
        #
        # todolst = list(dupset)
        # while(todolst):
        #     dupset = set()
        #     lvl += 1
        #     for aname in todolst:
        #         bklst = nameset[aname]
        #         del nameset[aname]
        #         for bkpath in bklst:
        #             newname = build_short_name(bkpath, lvl)
        #             spn[bkpath] = newname
        #             if newname in nameset:
        #                 dupset.add(newname)
        #                 nameset[newname].append(bkpath)
        #             else:
        #                 nameset[newname] = [bkpath]
        #     todolst = list(dupset)

        # finally sort by number of files in dir to find default folders for each group
        dirlst = []
        use_lower_case = False
        for group in self.group_folder.keys():
            folders = self.group_folder[group]
            cnts = self.group_count[group]
            folders = [x for _, x in sorted(zip(cnts, folders), reverse=True)]
            self.group_folder[group] = folders
            if group in [
                    "Text", "Styles", "Images", "Audio", "Fonts", "Video",
                    "Misc"
            ]:
                afolder = folders[0]
                if afolder.find(group.lower()) > -1:
                    use_lower_case = True
                dirlst.append(folders[0])

        # now back fill any missing values
        # commonbase will end with a /
        commonbase = longestCommonPath(dirlst)
        if commonbase == "/":
            commonbase = ""
        for group in ["Styles", "Images", "Audio", "Fonts", "Video", "Misc"]:
            folders = self.group_folder.get(group, [])
            gname = group
            if use_lower_case:
                gname = gname.lower()
            if not folders:
                folders = [commonbase + gname]
                self.group_folder[group] = folders