def run(bk): if any(bk.selected_iter()): on_selected = True else: on_selected = False for file_id, file_href in files_iter(bk, on_selected): xhtml_file = bk.readfile(file_id) xhtml_soup = sigil_bs4.BeautifulSoup(xhtml_file, 'lxml') if xhtml_soup.h1: header = xhtml_soup.h1.text elif xhtml_soup.h2: header = xhtml_soup.h2.text elif xhtml_soup.h3: header = xhtml_soup.h3.text elif xhtml_soup.h4: header = xhtml_soup.h4.text elif xhtml_soup.h5: header = xhtml_soup.h5.text elif xhtml_soup.h6: header = xhtml_soup.h6.text else: header = '' if not xhtml_soup.head.title: title = xhtml_soup.new_tag("title") xhtml_soup.head.append(title) xhtml_soup.head.title.string = header bk.writefile(file_id, xhtml_soup.prettyprint_xhtml(indent_chars=" ")) return 0
def parse(text, **kwargs): with gumboc.parse(text, **kwargs) as output: soup = sigil_bs4.BeautifulSoup('', "html.parser") _add_document(soup, output.contents.document.contents) for node in output.contents.document.contents.children: soup.append(_add_node(soup, node)) _add_next_prev_pointers(soup.html) return soup
def getCoverImageID(bk): # get cover image id from metadata coverImgID = '' metadata = bk.getmetadataxml() stinx = sigil_bs4.BeautifulSoup(metadata, 'xml') for node in stinx.find_all('meta'): if node.get('name') == 'cover': coverImgID = node.get('content') break return coverImgID
def editIdentifierInToC(bk, BookId): # read toc file contents tocManifestId = bk.gettocid() tocXml = bk.readfile(tocManifestId) tocSoup = sigil_bs4.BeautifulSoup(tocXml, 'xml') metaNode = tocSoup.find('head') # change the content of the identifier for node in metaNode.find_all('meta'): if node.get('name') == "dtb:uid": node['content'] = BookId print('Setting identifier in ToC: %s' % node) # write back bk.writefile(tocManifestId, tocSoup.prettify())
def setCoverImageID(bk, coverImgID): # set metadata: cover metadata_xml = bk.getmetadataxml() metadata_soup = sigil_bs4.BeautifulSoup(metadata_xml, 'xml') metadata_node = metadata_soup.find('metadata') if coverImgID: for node in metadata_node.find_all('meta'): # remove existing info if node.get('name') == 'cover': node.decompose() meta_cover_tag = metadata_soup.new_tag('meta') meta_cover_tag['name'] = 'cover' meta_cover_tag['content'] = coverImgID metadata_node.append(meta_cover_tag) bk.setmetadataxml(str(metadata_soup))
def run(bk): print('start') for (file_id, _) in bk.text_iter(): modified = False html = bk.readfile(file_id) soup = sigil_bs4.BeautifulSoup(html) # br tag will cause p tag cannot be found for elem in soup.findAll(['p', 'div', 'span'], text=re.compile('(\d+)')): modified = True text = elem.string for key in conversionDict: text = re.sub(key, conversionDict[key], text) elem.string.replace_with(text) # print(elem.string) if modified: print("Modifed File -> ", id) bk.writefile(file_id, fixSelfCloseTags(str(soup))) return 0
def run(bk): if any(bk.selected_iter()): on_selected = True else: on_selected = False for file_id, file_href in files_iter(bk, on_selected): xhtml_file = bk.readfile(file_id) xhtml_soup = sigil_bs4.BeautifulSoup(xhtml_file, 'lxml') # There's a typo in bk.href_to_basename until version 0.9.5 of Sigil if bk.launcher_version() <= 20160325: file_name = href_to_basename(file_href) else: file_name = bk.href_to_basename(file_href) if xhtml_soup.head.title: xhtml_soup.head.title.string = file_name[:file_name.rindex(".")] else: title = xhtml_soup.new_tag("title") title.string = file_name[:file_name.rindex(".")] xhtml_soup.head.append(title) bk.writefile(file_id, xhtml_soup.prettyprint_xhtml(indent_chars=" ")) return 0
def parse_xml(bk: 'BookContainer', collector: XHTMLAttributes, prefs: MutableMapping) -> XHTMLAttributes: fragid_container_attrs = prefs[ 'fragid_container_attrs'] or collector.fragid_container_attrs xhtml_files = set(id_ for id_, href in bk.text_iter()) for file_id, href, mime in bk.manifest_iter(): # if file is xhtml or not xml, skip ahead if file_id in xhtml_files or not re.search(r'[/+]xml\b', mime): continue try: soup = sigil_bs4.BeautifulSoup(bk.readfile(file_id), 'lxml-xml') except Exception as E: raise XMLParsingError('Error in {}: {}'.format( utils.href_to_basename(href), E)) for elem in soup.find_all(True): # gather fragment identifiers, if present for attr in fragid_container_attrs: fragid = get_fragid(elem, attr) if fragid: collector.fragment_identifier.add(fragid) return collector
def newIdentifierInMetadata(bk): metadata_xml = bk.getmetadataxml() metadata_soup = sigil_bs4.BeautifulSoup(metadata_xml, 'xml') metadata_node = metadata_soup.find('metadata') # remove the old identifier for node in metadata_node.find_all('identifier'): if node.get('id') == "BookId": node.decompose() # print('Creating a new BookID.') BookId = uuid.uuid4().urn id_node = metadata_soup.new_tag('dc:identifier') id_node['id'] = "BookId" id_node['opf:scheme'] = "UUID" id_node.string = BookId metadata_node.append(id_node) print('Setting metadata: %s' % id_node) bk.setmetadataxml(str(metadata_soup)) return BookId
def run(bk): # get python plugin path global plugin_path plugin_path = os.path.join(bk._w.plugin_dir, plugin_name) for (textID, textHref) in bk.text_iter(): print('\nProcessing text file: %s' % textHref) textContents = bk.readfile( textID) # Read the section into textContents if not isinstance( textContents, text_type ): # If the section is not str then sets its type to 'utf-8' textContents = text_type(textContents, 'utf-8') soup = sigil_bs4.BeautifulSoup(textContents, "xml") # TODO: near square image? # done in getSvgForImage. not yet backport to baka-epub useImgForLandscape = False svgSizePercent = 98 removeMe = [] for divNode in soup.find_all("div"): if divNode.has_attr('class') and "svg_outer" in divNode['class']: for imgNode in divNode.find_all(["img", "svg"]): if imgNode.name == 'img': imgSrc = imgNode['src'] else: imgSrc = imgNode.image['xlink:href'] if imgSrc.startswith('../'): imgSrc = imgSrc[3:] imgID = bk.href_to_id(imgSrc) if imgID: # image file exists print('Found image: ' + imgSrc) if (len(bk.readfile(imgID)) == 0): print('Zero-length file. Removing...') removeMe.append(divNode) else: _useImg = useImgForLandscape if "svg_yes" in divNode['class']: _useImg = False _svgSizePercent = svgSizePercent if "svg_100" in divNode['class']: _svgSizePercent = 100 svgNode = sigil_bs4.BeautifulSoup( getSvgForImage(bk, imgID, svgSizePercent=_svgSizePercent, useImgForLandscape=_useImg, dontWrapInDiv=True), "xml") imgNode.replace_with(svgNode) else: print('404 error: ' + imgSrc + '. Removing...') removeMe.append(divNode) for element in removeMe: element.decompose() textContents = str(soup) textContents = '<?xml version="1.0" encoding="utf-8"?>' + re.sub( '<\?xml\s.*?\?>', '', textContents) bk.writefile(textID, textContents) print('Done.') return 0