Python utf8_strの例、compatibility_utils.utf8_str Pythonの例

コード例 #1

0

ファイルを表示

ファイル: wrapper.py プロジェクト: Sigil-Ebook/Sigil

 def copy_book_contents_to(self, destdir):
     destdir = unicode_str(destdir)
     if destdir is None or not unipath.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath:
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime
         data = self.readfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data,text_type):
             data = utf8_str(data)
         with open(pathof(filepath),'wb') as fp:
             fp.write(data)
     for id in self.book_href_to_filepath:
         rpath = self.book_href_to_filepath[id]
         data = self.readotherfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data,text_type):
             data = utf8_str(data)
         with open(pathof(filepath),'wb') as fp:
             fp.write(data)

コード例 #2

0

ファイルを表示

ファイル: launcher.py プロジェクト: pwr/Sigil

def failed(script_type, msg):
    wrapper = _XML_HEADER
    if script_type is None:
        wrapper += "<wrapper>\n<result>failed</result>\n<changes/>\n"
    else:
        wrapper += '<wrapper type="%s">\n<result>failed</result>\n<changes/>\n' % script_type
    wrapper += "<msg>%s</msg>\n</wrapper>\n" % msg
    # write it to stdout and exit
    if PY3:
        sys.stdout.buffer.write(utf8_str(wrapper))
    else:
        sys.stdout.write(utf8_str(wrapper))

コード例 #3

0

ファイルを表示

ファイル: plugin.py プロジェクト: dougmassay/punctuationsmarten-sigil-plugin

def parseExceptionsFile(filename):
    safename = utf8_str(filename)
    words_list = []
    snippet = min(32, os.path.getsize(pathof(safename)))
    raw = open(pathof(safename), 'rb').read(snippet)
    if raw.startswith(codecs.BOM_UTF8):
        enc = 'utf-8-sig'
    else:
        encodings = ['utf-8', 'utf-16' 'windows-1252', 'windows-1250']
        for e in encodings:
            try:
                fh = file_open(pathof(safename), 'r', encoding=e)
                fh.readlines()
                fh.seek(0)
            except UnicodeDecodeError:
                print('Got unicode error with %s , trying different encoding' % e)
            else:
                break
        enc = e
    try:
        with file_open(pathof(safename), 'r', encoding=enc) as fd:
            words_list = [line.rstrip() for line in fd]
        # words_list = filter(None, words_list)
        words_list = [_f for _f in words_list if _f]
        print('Parsing apostrophe exception file %s' % filename)
    except:
        print('Error parsing apostrophe exception file %s: ignoring' % filename)
        words_list = []
    return words_list

コード例 #4

0

ファイルを表示

ファイル: epub_utils.py プロジェクト: CedarLogic/Sigil

def Idpf_encryption_key(uid):
    # remove whitespace changing nothing else
    key = utf8_str(uid)
    key = key.replace(bchr(0x20),b'')
    key = key.replace(bchr(0x09),b'')
    key = key.replace(bchr(0x0d),b'')
    key = key.replace(bchr(0x0a),b'')
    key = SHA1(key)
    return key

コード例 #5

0

ファイルを表示

ファイル: wrapper.py プロジェクト: JksnFst/Sigil

 def write_opf(self):
     if self.op is not None:
         filepath = pathof(os.path.join(self.outdir, 'OEBPS', self.opfname))
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         with open(filepath,'wb') as fp:
             data = utf8_str(self.build_opf())
             fp.write(data)

コード例 #6

0

ファイルを表示

ファイル: epub_utils.py プロジェクト: CedarLogic/Sigil

def Adobe_encryption_key(uid):
    # strip it down to simple valid hex characters
    # being careful to generate a string of 16 bytes in length
    key = utf8_str(uid)
    if key.startswith(b"urn:uuid:"):
        key = key[9:]
    key = key.replace(b'-',b'')
    key = re.sub(r'[^a-fA-F0-9]', b'', key)
    key = binascii.unhexlify((key + key)[:32])
    return key

コード例 #7

0

ファイルを表示

ファイル: epub_utils.py プロジェクト: ochaslot/Sigil-update

def Idpf_encryption_key(uid):
    # remove whitespace changing nothing else
    key = utf8_str(uid)
    if key.startswith(b"urn:uuid:"):
        key = key[9:]
    key = key.replace(bchr(0x20),b'')
    key = key.replace(bchr(0x09),b'')
    key = key.replace(bchr(0x0d),b'')
    key = key.replace(bchr(0x0a),b'')
    key = SHA1(key)
    return key

コード例 #8

0

ファイルを表示

ファイル: wrapper.py プロジェクト: pwr/Sigil

 def writefile(self, id, data):
     id = unicode_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     mime = self.id_to_mime.get(id,'')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if mime.endswith('+xml') or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'

コード例 #9

0

ファイルを表示

ファイル: wrapper.py プロジェクト: pwr/Sigil

 def writeotherfile(self, book_href, data):
     id = unicode_str(book_href)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     if id in PROTECTED_FILES:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'

コード例 #10

0

ファイルを表示

ファイル: wrapper.py プロジェクト: apys/Sigil

 def writeotherfile(self, book_href, data):
     id = unicode_str(book_href)
     if id in self.id_to_href:
         raise WrapperException("Incorrect interface routine - use writefile")
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException("book href does not exist")
     if id in PROTECTED_FILES:
         raise WrapperException("Attempt to modify protected file")
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath, "wb") as fp:
         fp.write(data)
     self.modified[id] = "file"

コード例 #11

0

ファイルを表示

 def writeotherfile(self, book_href, data):
     id = unicode_str(book_href)
     if id in self.id_to_href:
         raise WrapperException('Incorrect interface routine - use writefile')
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     if id in PROTECTED_FILES:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'

コード例 #12

0

ファイルを表示

ファイル: wrapper.py プロジェクト: JksnFst/Sigil

 def addotherfile(self, book_href, data) :
     id = unicode_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/",os.sep)
     filepath = os.path.join(self.outdir,desired_path)
     if unipath.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(pathof(filepath),'wb')as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path

コード例 #13

0

ファイルを表示

 def addotherfile(self, book_href, data) :
     id = unicode_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/",os.sep)
     filepath = os.path.join(self.outdir,desired_path)
     if unipath.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(pathof(filepath),'wb')as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path

コード例 #14

0

ファイルを表示

ファイル: wrapper.py プロジェクト: stjordanis/Sigil

 def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None):
     uniqueid = unicode_str(uniqueid)
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime == "application/x-dtbncx+xml" and self.epub_version.startswith("2"):
         raise WrapperException('Can not add or remove an ncx under epub2')
     group = mime_group_map.get(mime,"Misc")
     default_path = self.group_paths[group][0]
     bookpath = basename
     if default_path != "":
         bookpath = default_path + "/" + basename
     href = buildRelativePath(self.opfbookpath, bookpath)
     if href in self.href_to_id:
         raise WrapperException('Basename already exists')
     # now actually write out the new file
     filepath = bookpath.replace("/",os.sep)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir,filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime in TEXT_MIMETYPES or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = properties
     self.id_to_fall[uniqueid] = fallback
     self.id_to_over[uniqueid] = overlay
     self.id_to_bookpath[uniqueid] = bookpath
     self.href_to_id[href] = uniqueid
     self.bookpath_to_id[bookpath] = uniqueid
     self.added.append(uniqueid)
     self.modified[self.opfbookpath] = 'file'
     return uniqueid

コード例 #15

0

ファイルを表示

 def copy_book_contents_to(self, destdir):
     destdir = unicode_str(destdir)
     if destdir is None or not unipath.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath:
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime
         if in_manifest:
             data = self.readfile(id)
         else:
             data = self.readotherfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data,text_type):
             data = utf8_str(data)
         with open(pathof(filepath),'wb') as fp:
             fp.write(data)

コード例 #16

0

ファイルを表示

ファイル: wrapper.py プロジェクト: JksnFst/Sigil

 def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None):
     uniqueid = unicode_str(uniqueid)
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime.startswith("audio"):
         base = 'Audio'
     elif mime.startswith("video"):
         base = "Video"
     else:
         base = mime_base_map.get(mime,'Misc')
     href = base + "/" + basename
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     if href in self.href_to_id:
         raise WrapperException('Basename is not unique')
     # now actually write out the new file
     filepath = href.replace("/",os.sep)
     filepath = os.path.join('OEBPS', filepath)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir,filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime.endswith('+xml') or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = properties
     self.id_to_fall[uniqueid] = fallback
     self.id_to_over[uniqueid] = overlay
     self.href_to_id[href] = uniqueid
     self.added.append(uniqueid)
     self.modified['OEBPS/content.opf'] = 'file'
     return uniqueid

コード例 #17

0

ファイルを表示

 def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None):
     uniqueid = unicode_str(uniqueid)
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime.startswith("audio"):
         base = 'Audio'
     elif mime.startswith("video"):
         base = "Video"
     else:
         base = mime_base_map.get(mime,'Misc')
     href = base + "/" + basename
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     if href in self.href_to_id:
         raise WrapperException('Basename is not unique')
     # now actually write out the new file
     filepath = href.replace("/",os.sep)
     filepath = os.path.join('OEBPS', filepath)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir,filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime.endswith('+xml') or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = properties
     self.id_to_fall[uniqueid] = fallback
     self.id_to_over[uniqueid] = overlay
     self.href_to_id[href] = uniqueid
     self.added.append(uniqueid)
     self.modified['OEBPS/content.opf'] = 'file'
     return uniqueid

コード例 #18

0

ファイルを表示

ファイル: wrapper.py プロジェクト: Sigil-Ebook/Sigil

 def writeotherfile(self, book_href, data):
     id = unicode_str(book_href)
     id = unquoteurl(id)
     if id is None:
         raise WrapperException('None is not a valid book href')
     if id not in self.other and id in self.id_to_href:
         raise WrapperException('Incorrect interface routine - use writefile')
     filepath = self.book_href_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Book href does not exist')
     if id in PROTECTED_FILES:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'

コード例 #19

0

ファイルを表示

ファイル: wrapper.py プロジェクト: apys/Sigil

 def addfile(self, uniqueid, basename, data, mime=None):
     uniqueid = unicode_str(uniqueid)
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime.startswith("audio"):
         base = "Audio"
     elif mime.startswith("video"):
         base = "Video"
     else:
         base = mime_base_map.get(mime, "Misc")
     href = base + "/" + basename
     if uniqueid in self.id_to_href:
         raise WrapperException("Manifest Id is not unique")
     if href in self.href_to_id:
         raise WrapperException("Basename is not unique")
     # now actually write out the new file
     filepath = href.replace("/", os.sep)
     filepath = os.path.join("OEBPS", filepath)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime.endswith("+xml") or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath, "wb") as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.href_to_id[href] = uniqueid
     self.added.append(uniqueid)
     self.modified["OEBPS/content.opf"] = "file"
     return uniqueid

コード例 #20

0

ファイルを表示

ファイル: plugin.py プロジェクト: jayd2446/punctuationsmarten-sigil-plugin

    def cmdDo(self):
        global CRITERIA

        if self.dashBox.current() == 0:
            dash_settings = ''
        elif self.dashBox.current() == 1:
            dash_settings = 'd'
        elif self.dashBox.current() == 2:
            dash_settings = 'i'
        else:
            dash_settings = 'D'

        if self.use_file.get():
            self.cust_file_path.config(state="normal")
            if len(self.cust_file_path.get()):
                apos_exception_file = self.cust_file_path.get()
                if not unipath.exists(utf8_str(apos_exception_file)):
                    print('Apostrophe exception file %s does not exist!' %
                          apos_exception_file)
                    apos_exception_file = None
            else:
                apos_exception_file = None
            self.cust_file_path.config(state="readonly")
        else:
            apos_exception_file = None
        CRITERIA['apos_exception_file'] = apos_exception_file

        smarty_attr = self.edu_quotes.get(
        ) + dash_settings + self.edu_ellipses.get()
        if smarty_attr == '':
            smarty_attr = '0'
        CRITERIA['smarty_attr'] = smarty_attr

        CRITERIA['use_unicode'] = self.unicodevar.get()

        indices = self.filelist.curselection()
        CRITERIA['files'] = [self.filelist.get(index) for index in indices]

        self.quitApp()

コード例 #21

0

ファイルを表示

ファイル: plugin.py プロジェクト: dougmassay/punctuationsmarten-sigil-plugin

    def cmdDo(self):
        global CRITERIA

        if self.dashBox.current() == 0:
            dash_settings = ''
        elif self.dashBox.current() == 1:
            dash_settings = 'd'
        elif self.dashBox.current() == 2:
            dash_settings = 'i'
        else:
            dash_settings = 'D'

        if self.use_file.get():
            self.cust_file_path.config(state="normal")
            if len(self.cust_file_path.get()):
                apos_exception_file = self.cust_file_path.get()
                if not unipath.exists(utf8_str(apos_exception_file)):
                    print ('Apostrophe exception file %s does not exist!' % apos_exception_file)
                    apos_exception_file = None
            else:
                apos_exception_file = None
            self.cust_file_path.config(state="readonly")
        else:
            apos_exception_file = None
        CRITERIA['apos_exception_file'] = apos_exception_file

        smarty_attr = self.edu_quotes.get() + dash_settings + self.edu_ellipses.get()
        if smarty_attr == '':
            smarty_attr = '0'
        CRITERIA['smarty_attr'] = smarty_attr

        CRITERIA['use_unicode'] = self.unicodevar.get()

        indices = self.filelist.curselection()
        CRITERIA['files'] = [self.filelist.get(index) for index in indices]

        self.quitApp()

コード例 #22

0

ファイルを表示

def main(argv=unicode_argv()):

    if len(argv) != 5:
        failed(
            None,
            msg="Launcher: improper number of arguments passed to launcher.py")
        return -1

    ebook_root = argv[1]
    outdir = argv[2]
    script_type = argv[3]
    target_file = argv[4]
    script_home = os.path.dirname(target_file)
    plugin_name = os.path.split(script_home)[-1]
    plugin_dir = os.path.dirname(script_home)
    script_module = os.path.splitext(os.path.basename(target_file))[0]

    # do basic sanity checking anyway
    if script_type not in SUPPORTED_SCRIPT_TYPES:
        failed(None,
               msg="Launcher: script type %s is not supported" % script_type)
        return -1

    ok = unipath.exists(ebook_root) and unipath.isdir(ebook_root)
    ok = ok and unipath.exists(outdir) and unipath.isdir(outdir)
    ok = ok and unipath.exists(script_home) and unipath.isdir(script_home)
    ok = ok and unipath.exists(target_file) and unipath.isfile(target_file)
    if not ok:
        failed(None, msg="Launcher: missing or incorrect paths passed in")
        return -1

    # update sys with path to target module home directory
    sys.path.append(script_home)

    # load and parse opf if present
    op = None
    opf_path = os.path.join(ebook_root, 'OEBPS', 'content.opf')
    if unipath.exists(opf_path) and unipath.isfile(opf_path):
        op = Opf_Parser(opf_path)

    # create a wrapper for record keeping and safety
    rk = Wrapper(ebook_root, outdir, op, plugin_dir, plugin_name)

    # get the correct container
    if script_type == 'edit':
        bc = BookContainer(rk)
    elif script_type == 'input':
        bc = InputContainer(rk)
    elif script_type == 'validation':
        bc = ValidationContainer(rk)
    else:
        bc = OutputContainer(rk)

    # start the target script
    ps = ProcessScript(script_type, script_module, bc)
    ps.launch()

    # get standard error and standard out from the target script
    successmsg = ''
    for data in ps.stdouttext:
        successmsg += unicode_str(data)
    successmsg = escapeit(successmsg)
    errorlog = ''
    for data in ps.stderrtext:
        errorlog += unicode_str(data)
    errorlog = escapeit(errorlog)

    # get the target's script wrapper xml
    resultxml = "".join(ps.wrapout)
    resultxml += "<msg>\n"
    if ps.exitcode == 0:
        resultxml += successmsg
        if _DEBUG:
            resultxml += errorlog
    else:
        if _DEBUG:
            resultxml += successmsg
        resultxml += errorlog
    resultxml += '</msg>\n</wrapper>\n'

    # write it to stdout and exit
    if PY3:
        sys.stdout.buffer.write(utf8_str(resultxml))
    else:
        sys.stdout.write(utf8_str(resultxml))
    return 0

コード例 #23

0

ファイルを表示

ファイル: mobi_dict.py プロジェクト: junk2ool/azw2zip

    def applyInflectionRule(self, mainEntry, inflectionRuleData, start, end):
        '''
        Apply inflection rule.

        @param mainEntry: The word to inflect.
        @param inflectionRuleData: The inflection rules.
        @param start: The start position of the inflection rule to use.
        @param end: The end position of the inflection rule to use.
        @return: The string with the inflected word or None if an error occurs.
        '''
        mode = -1
        byteArray = array.array(array_format, mainEntry)
        position = len(byteArray)
        for charOffset in range(start, end):
            char = inflectionRuleData[charOffset:charOffset + 1]
            abyte = ord(char)
            if abyte >= 0x0a and abyte <= 0x13:
                # Move cursor backwards
                offset = abyte - 0x0a
                if mode not in [0x02, 0x03]:
                    mode = 0x02
                    position = len(byteArray)
                position -= offset
            elif abyte > 0x13:
                if mode == -1:
                    print(
                        "Error: Unexpected first byte %i of inflection rule" %
                        abyte)
                    return None
                elif position == -1:
                    print(
                        "Error: Unexpected first byte %i of inflection rule" %
                        abyte)
                    return None
                else:
                    if mode == 0x01:
                        # Insert at word start
                        byteArray.insert(position, abyte)
                        position += 1
                    elif mode == 0x02:
                        # Insert at word end
                        byteArray.insert(position, abyte)
                    elif mode == 0x03:
                        # Delete at word end
                        position -= 1
                        deleted = byteArray.pop(position)
                        if bchr(deleted) != char:
                            if DEBUG_DICT:
                                print("0x03: %s %s %s %s" %
                                      (mainEntry,
                                       toHex(inflectionRuleData[start:end]),
                                       char, bchr(deleted)))
                            print(
                                "Error: Delete operation of inflection rule failed"
                            )
                            return None
                    elif mode == 0x04:
                        # Delete at word start
                        deleted = byteArray.pop(position)
                        if bchr(deleted) != char:
                            if DEBUG_DICT:
                                print("0x03: %s %s %s %s" %
                                      (mainEntry,
                                       toHex(inflectionRuleData[start:end]),
                                       char, bchr(deleted)))
                            print(
                                "Error: Delete operation of inflection rule failed"
                            )
                            return None
                    else:
                        print(
                            "Error: Inflection rule mode %x is not implemented"
                            % mode)
                        return None
            elif abyte == 0x01:
                # Insert at word start
                if mode not in [0x01, 0x04]:
                    position = 0
                mode = abyte
            elif abyte == 0x02:
                # Insert at word end
                if mode not in [0x02, 0x03]:
                    position = len(byteArray)
                mode = abyte
            elif abyte == 0x03:
                # Delete at word end
                if mode not in [0x02, 0x03]:
                    position = len(byteArray)
                mode = abyte
            elif abyte == 0x04:
                # Delete at word start
                if mode not in [0x01, 0x04]:
                    position = 0
                # Delete at word start
                mode = abyte
            else:
                print("Error: Inflection rule mode %x is not implemented" %
                      abyte)
                return None
        return utf8_str(byteArray.tostring())

コード例 #24

0

ファイルを表示

ファイル: kindleunpack.py プロジェクト: junk2ool/azw2zip

def processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'):
    global DUMP
    global WRITE_RAW_DATA

    # extract raw markup langauge
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.k8dir,files.getInputFileBasename() + '.rawml')
        with open(pathof(outraw),'wb') as f:
            f.write(rawML)

    # KF8 require other indexes which contain parsing information and the FDST info
    # to process the rawml back into the xhtml files, css files, svg image files, etc
    k8proc = K8Processor(mh, sect, files, DUMP)
    k8proc.buildParts(rawML)

    # collect information for the guide first
    guidetext = unicode_str(k8proc.getGuideText())

    # if the guide was empty, add in any guide info from metadata, such as StartOffset
    if not guidetext and 'StartOffset' in metadata:
        # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part...
        # Taking that into account, we only care about the *last* StartOffset, which
        # should always be the correct one in these cases (the one actually pointing
        # to the right place in the mobi8 part).
        starts = metadata['StartOffset']
        last_start = starts[-1]
        last_start = int(last_start)
        if last_start == 0xffffffff:
            last_start = 0
        seq, idtext = k8proc.getFragTblInfo(last_start)
        filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), b'0000000000')
        linktgt = filename
        idtext = unicode_str(idtext, mh.codec)
        if idtext != '':
            linktgt += '#' + idtext
        guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt

    # if apnxfile is passed in use it for page map information
    if apnxfile is not None and pagemapproc is None:
        with open(apnxfile, 'rb') as f:
            apnxdata = b"00000000" + f.read()
        pagemapproc = PageMapProcessor(mh, apnxdata)

    # generate the page map
    pagemapxml = ''
    if pagemapproc is not None:
        pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc)
        outpm = os.path.join(files.k8oebps,'page-map.xml')
        with open(pathof(outpm),'wb') as f:
            f.write(pagemapxml.encode('utf-8'))
        if DUMP:
            print(pagemapproc.getNames())
            print(pagemapproc.getOffsets())
            print("\n\nPage Map")
            print(pagemapxml)

    # process the toc ncx
    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
    print("Processing ncx / toc")
    ncx = ncxExtract(mh, files)
    ncx_data = ncx.parseNCX()
    # extend the ncx data with filenames and proper internal idtags
    for i in range(len(ncx_data)):
        ncxmap = ncx_data[i]
        [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':')
        filename, idtag = k8proc.getIDTagByPosFid(fid, off)
        ncxmap['filename'] = filename
        ncxmap['idtag'] = unicode_str(idtag)
        ncx_data[i] = ncxmap

    # convert the rawML to a set of xhtml files
    print("Building an epub-like structure")
    htmlproc = XHTMLK8Processor(rscnames, k8proc)
    usedmap = htmlproc.buildXHTML()

    # write out the xhtml svg, and css files
    # fileinfo = [skelid|coverpage, dir, name]
    fileinfo = []
    # first create a cover page if none exists
    if CREATE_COVER_PAGE:
        cover = CoverProcessor(files, metadata, rscnames)
        cover_img = utf8_str(cover.getImageName())
        need_to_create_cover_page = False
        if cover_img is not None:
            if k8resc is None or not k8resc.hasSpine():
                part = k8proc.getPart(0)
                if part.find(cover_img) == -1:
                    need_to_create_cover_page = True
            else:
                if "coverpage" not in k8resc.spine_idrefs:
                    part = k8proc.getPart(int(k8resc.spine_order[0]))
                    if part.find(cover_img) == -1:
                        k8resc.prepend_to_spine("coverpage", "inserted", "no", None)
                if k8resc.spine_order[0] == "coverpage":
                    need_to_create_cover_page = True
            if need_to_create_cover_page:
                filename = cover.getXHTMLName()
                fileinfo.append(["coverpage", 'Text', filename])
                guidetext += cover.guide_toxml()
                cover.writeXHTML()

    n =  k8proc.getNumberOfParts()
    for i in range(n):
        part = k8proc.getPart(i)
        [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i)
        fileinfo.append([str(skelnum), dir, filename])
        fname = os.path.join(files.k8oebps,dir,filename)
        with open(pathof(fname),'wb') as f:
            f.write(part)
    n = k8proc.getNumberOfFlows()
    for i in range(1, n):
        [ptype, pformat, pdir, filename] = k8proc.getFlowInfo(i)
        flowpart = k8proc.getFlow(i)
        if pformat == b'file':
            fileinfo.append([None, pdir, filename])
            fname = os.path.join(files.k8oebps,pdir,filename)
            with open(pathof(fname),'wb') as f:
                f.write(flowpart)

    # create the opf
    opf = OPFProcessor(files, metadata.copy(), fileinfo, rscnames, True, mh, usedmap,
                       pagemapxml=pagemapxml, guidetext=guidetext, k8resc=k8resc, epubver=epubver)
    uuid = opf.writeOPF(bool(obfuscate_data))

    if opf.hasNCX():
        # Create a toc.ncx.
        ncx.writeK8NCX(ncx_data, metadata)
    if opf.hasNAV():
        # Create a navigation document.
        nav = NAVProcessor(files)
        nav.writeNAV(ncx_data, guidetext, metadata)

    # 表紙の番号取得
    cover_offset = int(mh.metadata.get('CoverOffset', ['-1'])[0])
    if not CREATE_COVER_PAGE:
        cover_offset = None

    # make an epub-like structure of it all
    print("Creating an epub-like file")
    files.makeEPUB(usedmap, obfuscate_data, uuid, azw2zip_cfg.isOutputEpub(), azw2zip_cfg.makeOutputFileName(mh.getMetaData()), cover_offset)

コード例 #25

0

ファイルを表示

ファイル: mobi_html.py プロジェクト: junk2ool/azw2zip

    def buildXHTML(self):

        # first need to update all links that are internal which
        # are based on positions within the xhtml files **BEFORE**
        # cutting and pasting any pieces into the xhtml text files

        #   kindle:pos:fid:XXXX:off:YYYYYYYYYY  (used for internal link within xhtml)
        #       XXXX is the offset in records into divtbl
        #       YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position

        # pos:fid pattern
        posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
        posfid_index_pattern = re.compile(
            br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')

        parts = []
        print("Building proper xhtml for each file")
        for i in range(self.k8proc.getNumberOfParts()):
            part = self.k8proc.getPart(i)
            [partnum, dir, filename, beg, end,
             aidtext] = self.k8proc.getPartInfo(i)

            # internal links
            srcpieces = posfid_pattern.split(part)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if tag.startswith(b'<'):
                    for m in posfid_index_pattern.finditer(tag):
                        posfid = m.group(1)
                        offset = m.group(2)
                        filename, idtag = self.k8proc.getIDTagByPosFid(
                            posfid, offset)
                        if idtag == b'':
                            replacement = b'"' + utf8_str(filename) + b'"'
                        else:
                            replacement = b'"' + utf8_str(
                                filename) + b'#' + idtag + b'"'
                        tag = posfid_index_pattern.sub(replacement, tag, 1)
                    srcpieces[j] = tag
            part = b"".join(srcpieces)
            parts.append(part)

        # we are free to cut and paste as we see fit
        # we can safely remove all of the Kindlegen generated aid tags
        # change aid ids that are in k8proc.linked_aids to xhtml ids
        find_tag_with_aid_pattern = re.compile(br'''(<[^>]*\said\s*=[^>]*>)''',
                                               re.IGNORECASE)
        within_tag_aid_position_pattern = re.compile(
            br'''\said\s*=['"]([^'"]*)['"]''')
        for i in range(len(parts)):
            part = parts[i]
            srcpieces = find_tag_with_aid_pattern.split(part)
            for j in range(len(srcpieces)):
                tag = srcpieces[j]
                if tag.startswith(b'<'):
                    for m in within_tag_aid_position_pattern.finditer(tag):
                        try:
                            aid = m.group(1)
                        except IndexError:
                            aid = None
                        replacement = b''
                        if aid in self.k8proc.linked_aids:
                            replacement = b' id="aid-' + aid + b'"'
                        tag = within_tag_aid_position_pattern.sub(
                            replacement, tag, 1)
                    srcpieces[j] = tag
            part = b"".join(srcpieces)
            parts[i] = part

        # we can safely replace all of the Kindlegen generated data-AmznPageBreak tags
        # with page-break-after style patterns
        find_tag_with_AmznPageBreak_pattern = re.compile(
            br'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
        within_tag_AmznPageBreak_position_pattern = re.compile(
            br'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
        for i in range(len(parts)):
            part = parts[i]
            srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
            for j in range(len(srcpieces)):
                tag = srcpieces[j]
                if tag.startswith(b'<'):
                    srcpieces[
                        j] = within_tag_AmznPageBreak_position_pattern.sub(
                            lambda m: b' style="page-break-after:' + m.group(1)
                            + b'"', tag)
            part = b"".join(srcpieces)
            parts[i] = part

        # we have to handle substitutions for the flows  pieces first as they may
        # be inlined into the xhtml text
        #   kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
        #   kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
        #   kindle:embed:XXXX   (used for fonts)

        flows = []
        flows.append(None)
        flowinfo = []
        flowinfo.append([None, None, None, None])

        # regular expression search patterns
        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''',
                                 re.IGNORECASE)
        img_index_pattern = re.compile(
            br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)

        tag_pattern = re.compile(br'''(<[^>]*>)''')
        flow_pattern = re.compile(
            br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''',
            re.IGNORECASE)

        url_pattern = re.compile(br'''(url\(.*?\))''', re.IGNORECASE)
        url_img_index_pattern = re.compile(
            br'''[('"]kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*["')]''',
            re.IGNORECASE)
        font_index_pattern = re.compile(
            br'''[('"]kindle:embed:([0-9|A-V]+)["')]''', re.IGNORECASE)
        url_css_index_pattern = re.compile(
            br'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''',
            re.IGNORECASE)
        url_svg_image_pattern = re.compile(
            br'''kindle:flow:([0-9|A-V]+)\?mime=image/svg\+xml[^\)]*''',
            re.IGNORECASE)

        for i in range(1, self.k8proc.getNumberOfFlows()):
            [ftype, format, dir, filename] = self.k8proc.getFlowInfo(i)
            flowpart = self.k8proc.getFlow(i)

            # links to raster image files from image tags
            # image_pattern
            srcpieces = img_pattern.split(flowpart)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if tag.startswith(b'<im'):
                    for m in img_index_pattern.finditer(tag):
                        imageNumber = fromBase32(m.group(1))
                        imageName = self.rscnames[imageNumber - 1]
                        if imageName is not None:
                            replacement = b'"../Images/' + utf8_str(
                                imageName) + b'"'
                            self.used[imageName] = 'used'
                            tag = img_index_pattern.sub(replacement, tag, 1)
                        else:
                            print(
                                "Error: Referenced image %s was not recognized as a valid image in %s"
                                % (imageNumber, tag))
                    srcpieces[j] = tag
            flowpart = b"".join(srcpieces)

            # replacements inside css url():
            srcpieces = url_pattern.split(flowpart)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]

                #  process links to raster image files
                for m in url_img_index_pattern.finditer(tag):
                    imageNumber = fromBase32(m.group(1))
                    imageName = self.rscnames[imageNumber - 1]
                    osep = m.group()[0:1]
                    csep = m.group()[-1:]
                    if imageName is not None:
                        replacement = osep + b'../Images/' + utf8_str(
                            imageName) + csep
                        self.used[imageName] = 'used'
                        tag = url_img_index_pattern.sub(replacement, tag, 1)
                    else:
                        print(
                            "Error: Referenced image %s was not recognized as a valid image in %s"
                            % (imageNumber, tag))

                # process links to fonts
                for m in font_index_pattern.finditer(tag):
                    fontNumber = fromBase32(m.group(1))
                    fontName = self.rscnames[fontNumber - 1]
                    osep = m.group()[0:1]
                    csep = m.group()[-1:]
                    if fontName is None:
                        print(
                            "Error: Referenced font %s was not recognized as a valid font in %s"
                            % (fontNumber, tag))
                    else:
                        replacement = osep + b'../Fonts/' + utf8_str(
                            fontName) + csep
                        tag = font_index_pattern.sub(replacement, tag, 1)
                        self.used[fontName] = 'used'

                # process links to other css pieces
                for m in url_css_index_pattern.finditer(tag):
                    num = fromBase32(m.group(1))
                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(
                        fnm) + b'"'
                    tag = url_css_index_pattern.sub(replacement, tag, 1)
                    self.used[fnm] = 'used'

                # process links to svg images
                for m in url_svg_image_pattern.finditer(tag):
                    num = fromBase32(m.group(1))
                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(
                        fnm) + b'"'
                    tag = url_svg_image_pattern.sub(replacement, tag, 1)
                    self.used[fnm] = 'used'

                srcpieces[j] = tag
            flowpart = b"".join(srcpieces)

            # store away in our own copy
            flows.append(flowpart)

            # I do not think this case exists and even if it does exist, it needs to be done in a separate
            # pass to prevent inlining a flow piece into another flow piece before the inserted one or the
            # target one has been fully processed

            # but keep it around if it ends up we do need it

            # flow pattern not inside url()
            # srcpieces = tag_pattern.split(flowpart)
            # for j in range(1, len(srcpieces),2):
            #     tag = srcpieces[j]
            #     if tag.startswith(b'<'):
            #         for m in flow_pattern.finditer(tag):
            #             num = fromBase32(m.group(1))
            #             [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
            #             flowtext = self.k8proc.getFlow(num)
            #             if fmt == b'inline':
            #                 tag = flowtext
            #             else:
            #                 replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
            #                 tag = flow_pattern.sub(replacement, tag, 1)
            #                 self.used[fnm] = 'used'
            #         srcpieces[j] = tag
            # flowpart = b"".join(srcpieces)

        # now handle the main text xhtml parts

        # Handle the flow items in the XHTML text pieces
        # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
        tag_pattern = re.compile(br'''(<[^>]*>)''')
        flow_pattern = re.compile(
            br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''',
            re.IGNORECASE)
        for i in range(len(parts)):
            part = parts[i]
            [partnum, dir, filename, beg, end,
             aidtext] = self.k8proc.partinfo[i]
            # flow pattern
            srcpieces = tag_pattern.split(part)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if tag.startswith(b'<'):
                    for m in flow_pattern.finditer(tag):
                        num = fromBase32(m.group(1))
                        if num > 0 and num < len(self.k8proc.flowinfo):
                            [typ, fmt, pdir,
                             fnm] = self.k8proc.getFlowInfo(num)
                            flowpart = flows[num]
                            if fmt == b'inline':
                                tag = flowpart
                            else:
                                replacement = b'"../' + utf8_str(
                                    pdir) + b'/' + utf8_str(fnm) + b'"'
                                tag = flow_pattern.sub(replacement, tag, 1)
                                self.used[fnm] = 'used'
                        else:
                            print("warning: ignoring non-existent flow link",
                                  tag, " value 0x%x" % num)
                    srcpieces[j] = tag
            part = b''.join(srcpieces)

            # store away modified version
            parts[i] = part

        # Handle any embedded raster images links in style= attributes urls
        style_pattern = re.compile(
            br'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''', re.IGNORECASE)
        img_index_pattern = re.compile(
            br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)

        for i in range(len(parts)):
            part = parts[i]
            [partnum, dir, filename, beg, end,
             aidtext] = self.k8proc.partinfo[i]

            # replace urls in style attributes
            srcpieces = style_pattern.split(part)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if b'kindle:embed' in tag:
                    for m in img_index_pattern.finditer(tag):
                        imageNumber = fromBase32(m.group(1))
                        imageName = self.rscnames[imageNumber - 1]
                        osep = m.group()[0:1]
                        csep = m.group()[-1:]
                        if imageName is not None:
                            replacement = osep + b'../Images/' + utf8_str(
                                imageName) + csep
                            self.used[imageName] = 'used'
                            tag = img_index_pattern.sub(replacement, tag, 1)
                        else:
                            print(
                                "Error: Referenced image %s in style url was not recognized in %s"
                                % (imageNumber, tag))
                    srcpieces[j] = tag
            part = b"".join(srcpieces)

            # store away modified version
            parts[i] = part

        # Handle any embedded raster images links in the xhtml text
        # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''',
                                 re.IGNORECASE)
        img_index_pattern = re.compile(
            br'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')

        for i in range(len(parts)):
            part = parts[i]
            [partnum, dir, filename, beg, end,
             aidtext] = self.k8proc.partinfo[i]

            # links to raster image files
            # image_pattern
            srcpieces = img_pattern.split(part)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if tag.startswith(b'<im'):
                    for m in img_index_pattern.finditer(tag):
                        imageNumber = fromBase32(m.group(1))
                        imageName = self.rscnames[imageNumber - 1]
                        if imageName is not None:
                            replacement = b'"../Images/' + utf8_str(
                                imageName) + b'"'
                            self.used[imageName] = 'used'
                            tag = img_index_pattern.sub(replacement, tag, 1)
                        else:
                            print(
                                "Error: Referenced image %s was not recognized as a valid image in %s"
                                % (imageNumber, tag))
                    srcpieces[j] = tag
            part = b"".join(srcpieces)
            # store away modified version
            parts[i] = part

        # finally perform any general cleanups needed to make valid XHTML
        # these include:
        #   in svg tags replace "perserveaspectratio" attributes with "perserveAspectRatio"
        #   in svg tags replace "viewbox" attributes with "viewBox"
        #   in <li> remove value="XX" attributes since these are illegal
        tag_pattern = re.compile(br'''(<[^>]*>)''')
        li_value_pattern = re.compile(br'''\svalue\s*=\s*['"][^'"]*['"]''',
                                      re.IGNORECASE)

        for i in range(len(parts)):
            part = parts[i]
            [partnum, dir, filename, beg, end,
             aidtext] = self.k8proc.partinfo[i]

            # tag pattern
            srcpieces = tag_pattern.split(part)
            for j in range(1, len(srcpieces), 2):
                tag = srcpieces[j]
                if tag.startswith(b'<svg') or tag.startswith(b'<SVG'):
                    tag = tag.replace(b'preserveaspectratio',
                                      b'preserveAspectRatio')
                    tag = tag.replace(b'viewbox', b'viewBox')
                elif tag.startswith(b'<li ') or tag.startswith(b'<LI '):
                    tagpieces = li_value_pattern.split(tag)
                    tag = b"".join(tagpieces)
                srcpieces[j] = tag
            part = b"".join(srcpieces)
            # store away modified version
            parts[i] = part

        self.k8proc.setFlows(flows)
        self.k8proc.setParts(parts)

        return self.used

コード例 #26

0

ファイルを表示

ファイル: kindleunpack.py プロジェクト: junk2ool/azw2zip

def processMobi7(mh, metadata, sect, files, rscnames):
    global DUMP
    global WRITE_RAW_DATA
    # An original Mobi
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.mobi7dir,files.getInputFileBasename() + '.rawml')
        with open(pathof(outraw),'wb') as f:
            f.write(rawML)

    # process the toc ncx
    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
    ncx = ncxExtract(mh, files)
    ncx_data = ncx.parseNCX()
    ncx.writeNCX(metadata)

    positionMap = {}

    # if Dictionary build up the positionMap
    if mh.isDictionary():
        if mh.DictInLanguage():
            metadata['DictInLanguage'] = [mh.DictInLanguage()]
        if mh.DictOutLanguage():
            metadata['DictOutLanguage'] = [mh.DictOutLanguage()]
        positionMap = dictSupport(mh, sect).getPositionMap()

    # convert the rawml back to Mobi ml
    proc = HTMLProcessor(files, metadata, rscnames)
    srctext = proc.findAnchors(rawML, ncx_data, positionMap)
    srctext, usedmap = proc.insertHREFS()

    # write the proper mobi html
    fileinfo=[]
    # fname = files.getInputFileBasename() + '.html'
    fname = 'book.html'
    fileinfo.append([None,'', fname])
    outhtml = os.path.join(files.mobi7dir, fname)
    with open(pathof(outhtml), 'wb') as f:
        f.write(srctext)

    # extract guidetext from srctext
    guidetext =b''
    # no pagemap support for older mobis
    # pagemapxml = None
    guidematch = re.search(br'''<guide>(.*)</guide>''',srctext,re.IGNORECASE+re.DOTALL)
    if guidematch:
        guidetext = guidematch.group(1)
        # sometimes old mobi guide from srctext horribly written so need to clean up
        guidetext = guidetext.replace(b"\r", b"")
        guidetext = guidetext.replace(b'<REFERENCE', b'<reference')
        guidetext = guidetext.replace(b' HREF=', b' href=')
        guidetext = guidetext.replace(b' TITLE=', b' title=')
        guidetext = guidetext.replace(b' TYPE=', b' type=')
        # reference must be a self-closing tag
        # and any href must be replaced with filepos information
        ref_tag_pattern = re.compile(br'''(<reference [^>]*>)''', re.IGNORECASE)
        guidepieces = ref_tag_pattern.split(guidetext)
        for i in range(1,len(guidepieces), 2):
            reftag = guidepieces[i]
            # remove any href there now to replace with filepos
            reftag = re.sub(br'''href\s*=[^'"]*['"][^'"]*['"]''',b'', reftag)
            # make sure the reference tag ends properly
            if not reftag.endswith(b"/>"):
                reftag = reftag[0:-1] + b"/>"
                guidepieces[i] = reftag
        guidetext = b''.join(guidepieces)
        replacetext = br'''href="'''+utf8_str(fileinfo[0][2])+ br'''#filepos\1"'''
        guidetext = re.sub(br'''filepos=['"]{0,1}0*(\d+)['"]{0,1}''', replacetext, guidetext)
        guidetext += b'\n'

    if 'StartOffset' in metadata:
        for value in metadata['StartOffset']:
            if int(value) == 0xffffffff:
                value = '0'
            starting_offset = value
        # get guide items from metadata
        metaguidetext = b'<reference type="text" href="'+utf8_str(fileinfo[0][2])+b'#filepos'+utf8_str(starting_offset)+b'" />\n'
        guidetext += metaguidetext

    if isinstance(guidetext, binary_type):
        guidetext = guidetext.decode(mh.codec)

    # create an OPF
    opf = OPFProcessor(files, metadata, fileinfo, rscnames, ncx.isNCX, mh, usedmap, guidetext=guidetext)
    opf.writeOPF()

コード例 #27

0

ファイルを表示

ファイル: launcher.py プロジェクト: pwr/Sigil

def main(argv=unicode_argv()):

    if len(argv) != 5:
        failed(None, msg="Launcher: improper number of arguments passed to launcher.py")
        return -1

    ebook_root = argv[1]
    outdir = argv[2]
    script_type = argv[3]
    target_file = argv[4]
    script_home = os.path.dirname(target_file)
    script_module = os.path.splitext(os.path.basename(target_file))[0]

    # do basic sanity checking anyway
    if script_type not in SUPPORTED_SCRIPT_TYPES:
        failed(None, msg="Launcher: script type %s is not supported" % script_type)
        return -1

    ok = unipath.exists(ebook_root) and unipath.isdir(ebook_root)
    ok = ok and unipath.exists(outdir) and unipath.isdir(outdir)
    ok = ok and unipath.exists(script_home) and unipath.isdir(script_home)
    ok = ok and unipath.exists(target_file) and unipath.isfile(target_file)
    if not ok:
        failed(None, msg="Launcher: missing or incorrect paths passed in")
        return -1

    # update sys with path to target module home directory
    if script_home not in sys.path:
        sys.path.append(script_home)

    # load and parse opf if present
    op = None
    opf_path = os.path.join(ebook_root, "OEBPS", "content.opf")
    if unipath.exists(opf_path) and unipath.isfile(opf_path):
        op = Opf_Parser(opf_path)

    # create a wrapper for record keeping and safety
    rk = Wrapper(ebook_root, outdir, op)

    # get the correct container
    if script_type == "edit":
        bc = BookContainer(rk)
    elif script_type == "input":
        bc = InputContainer(rk)
    else:
        bc = OutputContainer(rk)

    # start the target script
    ps = ProcessScript(script_type, script_module, bc)
    ps.launch()

    # get standard error and standard out from the target script
    successmsg = ""
    for data in ps.stdouttext:
        successmsg += unicode_str(data)
    successmsg = escapeit(successmsg)
    errorlog = ""
    for data in ps.stderrtext:
        errorlog += unicode_str(data)
    errorlog = escapeit(errorlog)

    # get the target's script wrapper xml
    resultxml = "".join(ps.wrapout)
    resultxml += "<msg>\n"
    if ps.exitcode == 0:
        resultxml += successmsg
        if _DEBUG:
            resultxml += errorlog
    else:
        if _DEBUG:
            resultxml += successmsg
        resultxml += errorlog
    resultxml += "</msg>\n</wrapper>\n"

    # write it to stdout and exit
    if PY3:
        sys.stdout.buffer.write(utf8_str(resultxml))
    else:
        sys.stdout.write(utf8_str(resultxml))
    return 0

コード例 #28

0

ファイルを表示

ファイル: mobi_k8proc.py プロジェクト: junk2ool/azw2zip

 def getGuideText(self):
     guidetext = b''
     for [ref_type, ref_title, fileno] in self.guidetbl:
         if ref_type == b'thumbimagestandard':
             continue
         if ref_type not in _guide_types and not ref_type.startswith(
                 b'other.'):
             if ref_type == b'start':
                 ref_type = b'text'
             else:
                 ref_type = b'other.' + ref_type
         [pos, idtext, filenum, seqnm, startpos,
          length] = self.fragtbl[fileno]
         [pn, pdir, filename, skelpos, skelend,
          aidtext] = self.getSkelInfo(pos)
         idtext = self.getIDTag(pos)
         linktgt = filename.encode('utf-8')
         if idtext != b'':
             linktgt += b'#' + idtext
         guidetext += b'<reference type="' + ref_type + b'" title="' + ref_title + b'" href="' + utf8_str(
             pdir) + b'/' + linktgt + b'" />\n'
     # opf is encoded utf-8 so must convert any titles properly
     guidetext = (guidetext.decode(self.mh.codec)).encode("utf-8")
     return guidetext