def add_el(self, el, el_len=100): if self.el_len_total > self.max_el_len_total: self.flush_els() if len(self.el_stack) == 0: assert_d(self.current_part == None) self.current_part, body_el = self.make_xhtml() self.el_stack.append(body_el) self.el_stack[-1].append(el) self.el_len_total += el_len return 'part' + str(self.part_number).zfill(4) + '.html'
def draw_text_el(draw, dpi, el): for par in el: par_coords = box_from_par(par) if par_coords is not None: render(draw, par, 'par', par_coords) tl, rb = par_coords t = '' for att, nick in [ ('align', 'ta'), ('leftIndent', 'li'), ('rightIndent', 'ri'), ('startIndent', 'si'), ('lineSpacing', 'ls') ]: att_txt = par.get(att) if att_txt is not None: t += nick + ':' + att_txt + ' ' if len(t) > 0: f = font.get_font("Courier", dpi / opts.scale, 12) draw.text(tl, t, font=f, fill=color.green) for line in par: render(draw, line, 'line'); for fmt in line: assert_d(fmt.tag == abyns+'formatting') font_name = fmt.get('ff') font_size = fmt.get('fs') font_size = int(re.sub('\.', '', font_size)) font_ital = (fmt.get('italic') == 'true') f = font.get_font(font_name, dpi / opts.scale, font_size, font_ital) for cp in fmt: assert_d(cp.tag == abyns+'charParams') if opts.text: draw.text((int(cp.get('l')) / opts.scale, int(cp.get('b')) / opts.scale), cp.text.encode('utf-8'), font=f, fill=color.yellow)
def assert_tag(el, expected): shorttag = nons(el.tag) if (shorttag != 'block'): assert_d(shorttag == expected)
def scan_pages(context, scandata, iabook): book_id = iabook.get_book_id() scandata_pages = scandata.pageData.page try: # dpi isn't always there dpi = int(scandata.bookData.dpi.text) except AttributeError: dpi = 300 i = 0 f = ImageFont.load_default() # f = ImageFont.load('/Users/mccabe/s/archive/epub/Times-18.bdf') for event, page in context: orig_width = int(page.get('width')) orig_height = int(page.get('height')) width = orig_width / s height = orig_height / s image = Image.new('RGB', (width, height)) page_image = None image_str = iabook.get_page_image(i, width, height, out_img_type='ppm') if image_str is not None: page_image = Image.open(StringIO.StringIO(image_str)) (nw, nh) = page_image.size if nw != width or nh != height: page_image = page_image.resize((width, height)) # image.paste(page_image, None) try: image = Image.blend(image, page_image, .2) except ValueError: print 'blending - images didn\'t match' debug() pass draw = ImageDraw.Draw(image) for block in page: if block.get('blockType') == 'Picture' and page_image is not None: cropped = page_image.crop(four_coords(block, scale)) image.paste(cropped, four_coords(block, scale)) for block in page: if block.get('blockType') == 'Text': render(draw, block, 'block_text') if block.get('blockType') == 'Picture': render(draw, block, 'block_picture') if block.get('blockType') == 'Table': render(draw, block, 'block_table') # else: # render(draw, block, 'block_picture') # if i > 0: # cropped = page_image.crop(four_coords(block)) # image.paste(cropped, four_coords(block)) for el in block: if el.tag == abyns+'region': pass elif el.tag == abyns+'row': for cell in el: for text in cell: for par in text: par_coords = box_from_par(par) if par_coords is not None: render(draw, par, 'par', par_coords) for line in par: render(draw, line, 'line'); for fmt in line: assert_d(fmt.tag == abyns+'formatting') font_name = fmt.get('ff') font_size = fmt.get('fs') font_size = int(re.sub('\.', '', font_size)) font_ital = (fmt.get('italic') == 'true') f = font.get_font(font_name, dpi / scale, font_size, font_ital) for cp in fmt: assert_d(cp.tag == abyns+'charParams') draw.text((int(cp.get('l'))/s, int(cp.get('b'))/s), cp.text.encode('utf-8'), font=f, fill=color.yellow) elif el.tag == abyns+'text': for par in el: par_coords = box_from_par(par) if par_coords is not None: render(draw, par, 'par', par_coords) for line in par: render(draw, line, 'line'); for fmt in line: assert_d(fmt.tag == abyns+'formatting') font_name = fmt.get('ff') font_size = fmt.get('fs') font_size = int(re.sub('\.', '', font_size)) font_ital = (fmt.get('italic') == 'true') f = font.get_font(font_name, dpi / scale, font_size, font_ital) for cp in fmt: assert_d(cp.tag == abyns+'charParams') draw.text((int(cp.get('l'))/s, int(cp.get('b'))/s), cp.text.encode('utf-8'), font=f, fill=color.yellow) elif (el.tag == abyns+'row'): pass else: print('unexpected tag type' + el.tag) sys.exit(-1) if not include_page(scandata_pages[i]): draw.line([(0, 0), image.size], width=50, fill=color.red) image.save(outdir + '/img' + scandata_pages[i].get('leafNum') + '.png') print 'page index: ' + str(i) page.clear() i += 1 return None