def saveEntry(self, thisEntry: BaseEntry, thisHash: str, prevHash: str, nextHash: str) -> None: dpath = join(self._filename, thisHash[:2]) makeDir(dpath) with open( join(dpath, thisHash[2:]), "w", encoding=self._encoding, ) as toFile: nextPath = self.hashToPath(nextHash) if nextHash else "END" if self._havePrevLink: prevPath = self.hashToPath(prevHash) if prevHash else "START" header = prevPath + " " + nextPath else: header = nextPath toFile.write("\n".join([ header, escapeNTB(thisEntry.s_word, bar=False), thisEntry.defi, ]))
def write(self) -> "Generator[None, BaseEntry, None]": encoding = self._encoding resources = self._resources max_file_size = self._max_file_size filename_format = self._filename_format escape_defi = self._escape_defi wordSep = ' <font color="red">|</font> ' initFileSizeMax = 100 glos = self._glos filename = self._filename self._encoding = encoding self._filename_format = filename_format entry_url_fmt = glos.getInfo("entry_url") def getEntryWebLink(entry) -> str: if not entry_url_fmt: return "" url = entry_url_fmt.format(word=html.escape(entry.l_word[0])) return f'{nbsp}<a class="no_ul" href="{url}">🌏</a>' # from math import log2, ceil # maxPosHexLen = int(ceil(log2(max_file_size) / 4)) indexTxtFileObj = open( join(filename, "index.txt"), mode="w", encoding="utf-8", ) linksTxtFileObj = open( join(filename, "links.txt"), mode="w", encoding="utf-8", ) title = glos.getInfo("name") style = "" if self._dark: style = darkStyle if self._css: cssLink = '<link rel="stylesheet" href="style.css" />' else: cssLink = "" header = ( '<!DOCTYPE html>\n' '<html><head>' f'<title>{{pageTitle}}</title>' f'<meta charset="{encoding}">' f'<style type="text/css">{style}{{customStyle}}</style>{cssLink}' '</meta></head><body>\n') def pageHeader(n: int): return header.format( pageTitle=f"Page {n} of {title}", customStyle="", ) def navBar() -> str: links = [] if len(self._filenameList) > 1: links.append( f'<a href="./{self._filenameList[-2]}">◀</a>') links.append(f'<a href="./{self.getNextFilename()}">▶</a>') links.append(f'<a href="./info.html">ℹ️</a></div>') return ('<div style="text-align: center; font-size: 2.5em;">' + f'{nbsp}{nbsp}{nbsp}'.join(links) + '</div>') tailSize = len(self._tail.encode(encoding)) if max_file_size < len(header) + tailSize: raise ValueError(f"max_file_size={max_file_size} is too small") max_file_size -= tailSize if not isdir(self._filename): os.mkdir(self._filename) fileObj = self.nextFile() fileObj.write(pageHeader(0)) fileObj.write(navBar()) re_fixed_link = re.compile( r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>', re.I, ) linkTargetSet = set() def replaceBword(text) -> str: return text.replace( ' href="bword://', ' href="#', ) def addLinks(text: str, pos: int) -> str: for m in re_fixed_link.finditer(text): if ' class="entry_link"' in m.group(0): continue if m.group(0).count("href=") != 1: log.error(f"unexpected match: {m.group(0)}") target = html.unescape(m.group(1)) linkTargetSet.add(target) start = m.start() b_start = len(text[:start].encode(encoding)) b_size = len(text[start:m.end()].encode(encoding)) linksTxtFileObj.write(f"{escapeNTB(target)}\t" f"{len(self._filenameList)-1}\t" f"{hex(pos+b_start)[2:]}\t" f"{hex(b_size)[2:]}\n") linksTxtFileObj.flush() self.writeInfo(filename, header) _word_title = self._word_title resDir = self._resDir entryIndex = -1 while True: entryIndex += 1 entry = yield if entry is None: break if entry.isData(): if resources: entry.save(resDir) continue if entry.defi.startswith('<!DOCTYPE html>') and defiFormat != "h": log.error(f"bad defiFormat={defiFormat}") defiFormat = "h" entry.detectDefiFormat() entry.stripFullHtml() defi = entry.defi defiFormat = entry.defiFormat if defiFormat == "m": defi = html.escape(defi) if "\n" in defi: # could be markdown or unformatted plaintext # FIXME: this changes the font to a monospace defi = f'<pre>{defi}</pre>' elif defiFormat == "h": if escape_defi: defi = html.escape(defi) defi = defi.replace(' src="./', ' src="./res/') entryId = f"entry{entryIndex}" if _word_title: words = [html.escape(word) for word in entry.l_word] title = glos.wordTitleStr( wordSep.join(words), sample=entry.l_word[0], _class="headword", ) if not title: title = f'Entry {entryIndex}' # entry_link_sym = "¶" entry_link_sym = "🔗" text = (f'<div id="{entryId}">{title}{nbsp}{nbsp}' f'<a class="no_ul" class="entry_link" href="#{entryId}">' f'{entry_link_sym}</a>' f'{getEntryWebLink(entry)}' f"<br>\n{defi}" '</div>\n' '<hr>\n') pos = fileObj.tell() if pos > initFileSizeMax: if pos > max_file_size - len(text.encode(encoding)): fileObj = self.nextFile() fileObj.write(pageHeader(len(self._filenameList) - 1)) fileObj.write(navBar()) pos = fileObj.tell() tmpFilename = escapeNTB(self._filenameList[-1]) for word in entry.l_word: indexTxtFileObj.write(f"{entryIndex}\t" f"{escapeNTB(word)}\t" f"{tmpFilename}\t" f"{pos}\n") del tmpFilename text = replaceBword(text) addLinks(text, pos) fileObj.write(text) fileObj.close() self._fileObj = None indexTxtFileObj.close() if linkTargetSet: log.info(f"{len(linkTargetSet)} link targets found") log.info("Fixing links, please wait...") self.fixLinks(linkTargetSet) os.remove(join(filename, "links.txt"))
def write(self) -> Generator[None, "BaseEntry", None]: encoding = self._encoding resources = self._resources max_file_size = self._max_file_size filename_format = self._filename_format escape_defi = self._escape_defi wordSep = ' <font color="red">|</font> ' initFileSizeMax = 100 glos = self._glos filename = self._filename self._encoding = encoding self._filename_format = filename_format entry_url_fmt = glos.getInfo("entry_url") def getEntryWebLink(entry) -> str: if not entry_url_fmt: return "" url = entry_url_fmt.format(word=html.escape(entry.l_word[0])) return f' <a class="no_ul" href="{url}">🌏</a>' # from math import log2, ceil # maxPosHexLen = int(ceil(log2(max_file_size) / 4)) indexTxtFileObj = open( join(filename, "index.txt"), mode="w", encoding="utf-8", ) linksTxtFileObj = open( join(filename, "links.txt"), mode="w", encoding="utf-8", ) title = glos.getInfo("name") style = "" if self._dark: style = darkStyle header = ('<!DOCTYPE html>\n' '<html><head>' f'<title>{{pageTitle}}</title>' f'<meta charset="{encoding}">' f'<style type="text/css">{style}{{customStyle}}</style>' '</meta></head><body>\n') def pageHeader(n: int): return header.format( pageTitle=f"Page {n} of {title}", customStyle="", ) def stripEntryFullHtml(entry, defi: str) -> str: word = entry.s_word i = defi.find('<body') if i == -1: log.error(f"<body not found: word={word}") return defi defi = defi[i + 5:] i = defi.find('>') if i == -1: log.error(f"'>' after <body not found: word={word}") return defi defi = defi[i + 1:] i = defi.find('</body') if i == -1: log.error(f"</body close not found: word={word}") return defi defi = defi[:i] return defi def navBar() -> str: links = [] if len(self._filenameList) > 1: links.append( f'<a href="./{self._filenameList[-2]}">◀</a>') links.append(f'<a href="./{self.getNextFilename()}">▶</a>') links.append(f'<a href="./info.html">ℹ️</a></div>') return ('<div style="text-align: center; font-size: 2.5em;">' + ' '.join(links) + '</div>') tailSize = len(self._tail.encode(encoding)) if max_file_size < len(header) + tailSize: raise ValueError(f"max_file_size={max_file_size} is too small") max_file_size -= tailSize if not isdir(self._filename): os.mkdir(self._filename) fileObj = self.nextFile() fileObj.write(pageHeader(0)) fileObj.write(navBar()) re_fixed_link = re.compile( r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>', re.I, ) linkTargetSet = set() def replaceBword(text) -> str: return text.replace( ' href="bword://', ' href="#', ) def addLinks(text: str, pos: int) -> str: for m in re_fixed_link.finditer(text): if ' class="entry_link"' in m.group(0): continue if m.group(0).count("href=") != 1: log.error(f"unexpected match: {m.group(0)}") target = html.unescape(m.group(1)) linkTargetSet.add(target) start = m.start() b_start = len(text[:start].encode(encoding)) b_size = len(text[start:m.end()].encode(encoding)) linksTxtFileObj.write(f"{escapeNTB(target)}\t" f"{len(self._filenameList)-1}\t" f"{hex(pos+b_start)[2:]}\t" f"{hex(b_size)[2:]}\n") linksTxtFileObj.flush() self.writeInfo(filename, header) defiHasHeadwords = glos.getInfo("definition_has_headwords") == "True" resDir = self._resDir entryIndex = -1 while True: entryIndex += 1 entry = yield if entry is None: break if entry.isData(): if resources: entry.save(resDir) continue defi = entry.defi defiFormat = entry.defiFormat if defi.startswith('<!DOCTYPE html>'): if defiFormat != "h": log.error(f"bad defiFormat={defiFormat}") defiFormat = "h" defi = stripEntryFullHtml(entry, defi) if defiFormat == "m": defi = defi.replace("\n", "<br>") if escape_defi: defi = html.escape(defi) entryId = f"entry{entryIndex}" if defiHasHeadwords: headwords = f'Entry {entryIndex}' else: headwords = f'<b class="headword">{wordSep.join(entry.l_word)}</b>' text = ( f'<div id="{entryId}">{headwords} ' f'<a class="no_ul" class="entry_link" href="#{entryId}">🔗</a>' f'{getEntryWebLink(entry)}' f"<br>\n{defi}" '</div>\n' '<hr>\n') pos = fileObj.tell() if pos > initFileSizeMax: if pos > max_file_size - len(text.encode(encoding)): fileObj = self.nextFile() fileObj.write(pageHeader(len(self._filenameList) - 1)) fileObj.write(navBar()) pos = fileObj.tell() tmpFilename = escapeNTB(self._filenameList[-1]) for word in entry.l_word: indexTxtFileObj.write(f"{entryIndex}\t" f"{escapeNTB(word)}\t" f"{tmpFilename}\t" f"{pos}\n") del tmpFilename text = replaceBword(text) addLinks(text, pos) fileObj.write(text) fileObj.close() self._fileObj = None indexTxtFileObj.close() if linkTargetSet: log.info(f"\n{len(linkTargetSet)} link targets found") log.info("Fixing links, please wait...") self.fixLinks(linkTargetSet) os.remove(join(filename, "links.txt"))