import re target_file = "D:\CLIENTS\Random House\_CONVERSIONS\howbrit\New/final.epub" image_output_dir = "D:\CLIENTS\Random House\_CONVERSIONS\howbrit\New/bakedcaptions" font_size = 20 fontdir = "C:/Windows/Fonts/" italicfont = "Bitter-Italic.otf" regularfont = "Bitter-Regular.otf" fontcolor = "#36250b" default_x = 600 default_y = 800 data = [] with epub.epubFile(target_file) as pew: # pew.info.findIDreferences(r"(page[0-9]*)") imagefinder = re.compile( '<div class="image">[\s]*<img alt="" class="image-fix"( height="[0-9]*")? src="(.*?)"( width="[0-9]*")? />[\s]*<p class="caption">(.*?)</p>[\s]*</div>' ) for spine_item in pew.info.opf.spine: contents = spine_item.read() for match in re.finditer(imagefinder, contents): proto_data = { "src": None, "caption": None, "location": spine_item.opfRelLoc, "item": None, "match": None, "x": None, "y": None,
#!/usr/bin/python import epub import os test = epub.epubFile(os.path.join(os.path.dirname(__file__), "test_files/sample_file.epub")) #test = epub.epubFile(os.path.join(os.path.dirname(__file__), "test_files/sample_file.epub")) #get opf location test #print test.info._getOPFLocation() #contents test #for item in test.info.contents: # print item.info() #contents test2 #print test.info.contents.opfIdDir #print test.info.opf.contents.opfIdDir #manifest test #for item in test.info.opf.manifest: # print item.info() #spine test #for item in test.info.opf.spine: # print item.opfRelLoc #random test 1 #item = test.info.opf.manifest[1] #print item.info()
import re target_file = "D:\CLIENTS\Random House\_CONVERSIONS\howbrit\New/final.epub" image_output_dir = "D:\CLIENTS\Random House\_CONVERSIONS\howbrit\New/bakedcaptions" font_size = 20 fontdir = "C:/Windows/Fonts/" italicfont = "Bitter-Italic.otf" regularfont = "Bitter-Regular.otf" fontcolor = '#36250b' default_x = 600 default_y = 800 data = [] with epub.epubFile(target_file) as pew: #pew.info.findIDreferences(r"(page[0-9]*)") imagefinder = re.compile( "<div class=\"image\">[\s]*<img alt=\"\" class=\"image-fix\"( height=\"[0-9]*\")? src=\"(.*?)\"( width=\"[0-9]*\")? />[\s]*<p class=\"caption\">(.*?)</p>[\s]*</div>" ) for spine_item in pew.info.opf.spine: contents = spine_item.read() for match in re.finditer(imagefinder, contents): proto_data = { "src": None, "caption": None, "location": spine_item.opfRelLoc, "item": None, "match": None, "x": None, "y": None,