def read_manifest(self): if '/manifest' not in self.entries: raise LitError('Lit file does not have a valid manifest') raw = self.get_file('/manifest') self.manifest = {} self.paths = {self.opf_path: None} while raw: slen, raw = ord(raw[0:1]), raw[1:] if slen == 0: break root, raw = raw[:slen].decode('utf8'), raw[slen:] if not raw: raise LitError('Truncated manifest') for state in ['spine', 'not spine', 'css', 'images']: num_files, raw = int32(raw), raw[4:] if num_files == 0: continue for i in range(num_files): if len(raw) < 5: raise LitError('Truncated manifest') offset, raw = u32(raw), raw[4:] internal, raw = consume_sized_utf8_string(raw) original, raw = consume_sized_utf8_string(raw) # The path should be stored unquoted, but not always original = urlunquote(original) # Is this last one UTF-8 or ASCIIZ? mime_type, raw = consume_sized_utf8_string(raw, zpad=True) self.manifest[internal] = ManifestItem( original, internal, mime_type, offset, root, state) mlist = list(itervalues(self.manifest)) # Remove any common path elements if len(mlist) > 1: shared = mlist[0].path for item in mlist[1:]: path = item.path while shared and not path.startswith(shared): try: shared = shared[:shared.rindex("/", 0, -2) + 1] except ValueError: shared = None if not shared: break if shared: slen = len(shared) for item in mlist: item.path = item.path[slen:] # Fix any straggling absolute paths for item in mlist: if item.path[0] == '/': item.path = os.path.basename(item.path) self.paths[item.path] = item
def read(self, name): entry = self._litfile.paths[urlunquote(name)] if name else None if entry is None: content = OPF_DECL + self._read_meta() elif 'spine' in entry.state: internal = '/'.join(('/data', entry.internal, 'content')) raw = self._litfile.get_file(internal) manifest = self._litfile.manifest atoms = self._litfile.get_atoms(entry) unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms) content = HTML_DECL + unbin.unicode_representation tags = ('personname', 'place', 'city', 'country-region') pat = r'(?i)</{0,1}st1:(%s)>' % ('|'.join(tags)) content = re.sub(pat, '', content) content = re.sub(r'<(/{0,1})form>', r'<\1div>', content) else: internal = '/'.join(('/data', entry.internal)) content = self._litfile.get_file(internal) return content
def read(self, name): entry = self._litfile.paths[urlunquote(name)] if name else None if entry is None: content = OPF_DECL + self._read_meta() elif 'spine' in entry.state: internal = '/'.join(('/data', entry.internal, 'content')) raw = self._litfile.get_file(internal) manifest = self._litfile.manifest atoms = self._litfile.get_atoms(entry) unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms) content = HTML_DECL + unbin.unicode_representation tags = ('personname', 'place', 'city', 'country-region') pat = r'(?i)</{0,1}st1:(%s)>'%('|'.join(tags)) content = re.sub(pat, '', content) content = re.sub(r'<(/{0,1})form>', r'<\1div>', content) else: internal = '/'.join(('/data', entry.internal)) content = self._litfile.get_file(internal) return content
def exists(self, name): return urlunquote(name) in self._litfile.paths