def _images(self, manifest): from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES images = [] used_names = [] for item in manifest: if item.media_type in OEB_RASTER_IMAGES: try: data = b'' im = Image.open(io.BytesIO(item.data)).convert('L') data = io.BytesIO() im.save(data, 'PNG') data = data.getvalue() name = '%s.png' % len(used_names) name = unique_name(name, used_names) used_names.append(name) self.name_map[item.href] = name images.append((name, data)) except Exception as e: self.log.error('Error: Could not include file %s becuase ' '%s.' % (item.href, e)) return images
def _images(self, manifest): from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES images = [] used_names = [] for item in manifest: if item.media_type in OEB_RASTER_IMAGES: try: data = '' im = Image.open(cStringIO.StringIO(item.data)).convert('L') data = cStringIO.StringIO() im.save(data, 'PNG') data = data.getvalue() name = '%s.png' % len(used_names) name = unique_name(name, used_names) used_names.append(name) self.name_map[item.href] = name images.append((name, data)) except Exception as e: self.log.error('Error: Could not include file %s becuase ' '%s.' % (item.href, e)) return images
def dump_text(self, elem, stylizer, page, tag_stack=[]): from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS: p = elem.getparent() if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \ and elem.tail: return [elem.tail] return [u''] text = [u''] style = stylizer.style(elem) if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': if hasattr(elem, 'tail') and elem.tail: return [elem.tail] return [u''] tag = barename(elem.tag) tag_count = 0 # Process tags that need special processing and that do not have inner # text. Usually these require an argument if tag in IMAGE_TAGS: if elem.attrib.get('src', None): if page.abshref(elem.attrib['src']) not in self.name_map.keys(): self.name_map[page.abshref(elem.attrib['src'])] = unique_name('%s' % len(self.name_map.keys()), self.name_map.keys()) text.append('<IMG SRC="%s">' % self.name_map[page.abshref(elem.attrib['src'])]) rb_tag = tag.upper() if tag in TAGS else None if rb_tag: tag_count += 1 text.append('<%s>' % rb_tag) tag_stack.append(rb_tag) # Anchors links if tag in LINK_TAGS: href = elem.get('href') if href: href = page.abshref(href) if '://' not in href: if '#' not in href: href += '#' if href not in self.link_hrefs.keys(): self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys()) href = self.link_hrefs[href] text.append('<A HREF="#%s">' % href) tag_count += 1 tag_stack.append('A') # Anchor ids id_name = elem.get('id') if id_name: text.append(self.get_anchor(page, id_name)) # Processes style information for s in STYLES: style_tag = s[1].get(style[s[0]], None) if style_tag: style_tag = style_tag.upper() tag_count += 1 text.append('<%s>' % style_tag) tag_stack.append(style_tag) # Proccess tags that contain text. if hasattr(elem, 'text') and elem.text: text.append(prepare_string_for_xml(elem.text)) for item in elem: text += self.dump_text(item, stylizer, page, tag_stack) close_tag_list = [] for i in range(0, tag_count): close_tag_list.insert(0, tag_stack.pop()) text += self.close_tags(close_tag_list) if hasattr(elem, 'tail') and elem.tail: text.append(prepare_string_for_xml(elem.tail)) return text