Beispiel #1
0
 def serialize_elem(self, elem, item, nsrmap=NSRMAP):
     buf = self.buf
     if not isinstance(elem.tag, basestring) \
         or namespace(elem.tag) not in nsrmap:
             return
     tag = prefixname(elem.tag, nsrmap)
     # Previous layers take care of @name
     id_ = elem.attrib.pop('id', None)
     if id_:
         href = '#'.join((item.href, id_))
         offset = self.anchor_offset or buf.tell()
         key = urlnormalize(href)
         # Only set this id_offset if it wasn't previously seen
         self.id_offsets[key] = self.id_offsets.get(key, offset)
     if self.anchor_offset is not None and \
         tag == 'a' and not elem.attrib and \
         not len(elem) and not elem.text:
             return
     self.anchor_offset = buf.tell()
     buf.write(b'<')
     buf.write(tag.encode('utf-8'))
     if elem.attrib:
         for attr, val in elem.attrib.items():
             if namespace(attr) not in nsrmap:
                 continue
             attr = prefixname(attr, nsrmap)
             buf.write(b' ')
             if attr == 'href':
                 if self.serialize_href(val, item):
                     continue
             elif attr == 'src':
                 href = urlnormalize(item.abshref(val))
                 if href in self.images:
                     index = self.images[href]
                     self.used_images.add(href)
                     buf.write(b'recindex="%05d"' % index)
                     continue
             buf.write(attr.encode('utf-8'))
             buf.write(b'="')
             self.serialize_text(val, quot=True)
             buf.write(b'"')
     buf.write(b'>')
     if elem.text or len(elem) > 0:
         if elem.text:
             self.anchor_offset = None
             self.serialize_text(elem.text)
         for child in elem:
             self.serialize_elem(child, item)
             if child.tail:
                 self.anchor_offset = None
                 self.serialize_text(child.tail)
     buf.write(b'</%s>' % tag.encode('utf-8'))
Beispiel #2
0
 def tree_to_binary(self,
                    elem,
                    nsrmap=NSRMAP,
                    parents=[],
                    inhead=False,
                    preserve=False):
     if not isinstance(elem.tag, basestring):
         # Don't emit any comments or raw entities
         return
     nsrmap = copy.copy(nsrmap)
     attrib = dict(elem.attrib)
     style = self.stylizer.style(elem) if self.stylizer else None
     for key, value in elem.nsmap.items():
         if value not in nsrmap or nsrmap[value] != key:
             xmlns = ('xmlns:' + key) if key else 'xmlns'
             attrib[xmlns] = value
         nsrmap[value] = key
     tag = prefixname(elem.tag, nsrmap)
     tag_offset = self.buf.tell()
     if tag == 'head':
         inhead = True
     flags = FLAG_OPENING
     if not elem.text and len(elem) == 0:
         flags |= FLAG_CLOSING
     if inhead:
         flags |= FLAG_HEAD
     if style and self.is_block(style):
         flags |= FLAG_BLOCK
     self.write(0, flags)
     tattrs = self.tattrs[0]
     if tag in self.tags:
         index = self.tags[tag]
         self.write(index)
         if self.tattrs[index]:
             tattrs = self.tattrs[index]
     else:
         self.write(FLAG_CUSTOM, len(tag) + 1, tag)
     last_break = self.page_breaks[-1][0] if self.page_breaks else None
     if style and last_break != tag_offset \
        and style['page-break-before'] in PAGE_BREAKS:
         self.page_breaks.append((tag_offset, list(parents)))
     for attr, value in attrib.items():
         attr = prefixname(attr, nsrmap)
         if attr in ('href', 'src'):
             value = urlnormalize(value)
             path, frag = urldefrag(value)
             if self.item:
                 path = self.item.abshref(path)
             prefix = unichr(3)
             if path in self.manifest.hrefs:
                 prefix = unichr(2)
                 value = self.manifest.hrefs[path].id
                 if frag:
                     value = '#'.join((value, frag))
             value = prefix + value
         elif attr in ('id', 'name'):
             self.anchors.append((value, tag_offset))
         elif attr.startswith('ms--'):
             attr = '%' + attr[4:]
         elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
             value = CSS_MIME
         if attr in tattrs:
             self.write(tattrs[attr])
         else:
             self.write(FLAG_CUSTOM, len(attr) + 1, attr)
         try:
             self.write(ATTR_NUMBER, int(value) + 1)
         except ValueError:
             self.write(len(value) + 1, value)
     self.write(0)
     old_preserve = preserve
     if style:
         preserve = (style['white-space'] in ('pre', 'pre-wrap'))
     xml_space = elem.get(XML('space'))
     if xml_space == 'preserve':
         preserve = True
     elif xml_space == 'normal':
         preserve = False
     if elem.text:
         if preserve:
             self.write(elem.text)
         elif len(elem) == 0 or not elem.text.isspace():
             self.write(COLLAPSE.sub(' ', elem.text))
         # else: de nada
     parents.append(tag_offset)
     child = cstyle = nstyle = None
     for next in chain(elem, [None]):
         if self.stylizer:
             nstyle = None if next is None else self.stylizer.style(next)
         if child is not None:
             if not preserve \
                and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \
                and child.tail and child.tail.isspace():
                 child.tail = None
             self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
         child, cstyle = next, nstyle
     parents.pop()
     preserve = old_preserve
     if not flags & FLAG_CLOSING:
         self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
     if elem.tail and tag != 'html':
         tail = elem.tail
         if not preserve:
             tail = COLLAPSE.sub(' ', tail)
         self.write(tail)
     if style and style['page-break-after'] not in ('avoid', 'auto'):
         self.page_breaks.append((self.buf.tell(), list(parents)))
Beispiel #3
0
 def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
                    inhead=False, preserve=False):
     if not isinstance(elem.tag, basestring):
         # Don't emit any comments or raw entities
         return
     nsrmap = copy.copy(nsrmap)
     attrib = dict(elem.attrib)
     style = self.stylizer.style(elem) if self.stylizer else None
     for key, value in elem.nsmap.items():
         if value not in nsrmap or nsrmap[value] != key:
             xmlns = ('xmlns:' + key) if key else 'xmlns'
             attrib[xmlns] = value
         nsrmap[value] = key
     tag = prefixname(elem.tag, nsrmap)
     tag_offset = self.buf.tell()
     if tag == 'head':
         inhead = True
     flags = FLAG_OPENING
     if not elem.text and len(elem) == 0:
         flags |= FLAG_CLOSING
     if inhead:
         flags |= FLAG_HEAD
     if style and self.is_block(style):
         flags |= FLAG_BLOCK
     self.write(0, flags)
     tattrs = self.tattrs[0]
     if tag in self.tags:
         index = self.tags[tag]
         self.write(index)
         if self.tattrs[index]:
             tattrs = self.tattrs[index]
     else:
         self.write(FLAG_CUSTOM, len(tag)+1, tag)
     last_break = self.page_breaks[-1][0] if self.page_breaks else None
     if style and last_break != tag_offset \
        and style['page-break-before'] in PAGE_BREAKS:
         self.page_breaks.append((tag_offset, list(parents)))
     for attr, value in attrib.items():
         attr = prefixname(attr, nsrmap)
         if attr in ('href', 'src'):
             value = urlnormalize(value)
             path, frag = urldefrag(value)
             if self.item:
                 path = self.item.abshref(path)
             prefix = unichr(3)
             if path in self.manifest.hrefs:
                 prefix = unichr(2)
                 value = self.manifest.hrefs[path].id
                 if frag:
                     value = '#'.join((value, frag))
             value = prefix + value
         elif attr in ('id', 'name'):
             self.anchors.append((value, tag_offset))
         elif attr.startswith('ms--'):
             attr = '%' + attr[4:]
         elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
             value = CSS_MIME
         if attr in tattrs:
             self.write(tattrs[attr])
         else:
             self.write(FLAG_CUSTOM, len(attr)+1, attr)
         try:
             self.write(ATTR_NUMBER, int(value)+1)
         except ValueError:
             self.write(len(value)+1, value)
     self.write(0)
     old_preserve = preserve
     if style:
         preserve = (style['white-space'] in ('pre', 'pre-wrap'))
     xml_space = elem.get(XML('space'))
     if xml_space == 'preserve':
         preserve = True
     elif xml_space == 'normal':
         preserve = False
     if elem.text:
         if preserve:
             self.write(elem.text)
         elif len(elem) == 0 or not elem.text.isspace():
             self.write(COLLAPSE.sub(' ', elem.text))
         # else: de nada
     parents.append(tag_offset)
     child = cstyle = nstyle = None
     for next in chain(elem, [None]):
         if self.stylizer:
             nstyle = None if next is None else self.stylizer.style(next)
         if child is not None:
             if not preserve \
                and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \
                and child.tail and child.tail.isspace():
                 child.tail = None
             self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
         child, cstyle = next, nstyle
     parents.pop()
     preserve = old_preserve
     if not flags & FLAG_CLOSING:
         self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
     if elem.tail and tag != 'html':
         tail = elem.tail
         if not preserve:
             tail = COLLAPSE.sub(' ', tail)
         self.write(tail)
     if style and style['page-break-after'] not in ('avoid', 'auto'):
         self.page_breaks.append((self.buf.tell(), list(parents)))