def render_act(self, act): buf = self.buf buf.write('<article class="act">\n') if act.bookmark_id: buf.write('<header id="') buf.write(act.bookmark_id) buf.write('">') else: buf.write('<header>') buf.write(act.name) buf.write('</header>\n' '<ol class="history">\n') for h in act.history: buf.write('<li>') for noun, abbr in HIST_ABBREVIATION_LIST: h = h.replace(noun, abbr) h = RE_NUMERIC_DATE_FORMAT.sub(utils.repl_numeric_date, h) h = RE_NUMERIC_DATE_INLINE_FORMAT.sub(utils.repl_numeric_inline_date, h) h = RE_REPUBLIC_DATE_FORMAT.sub(utils.repl_cjk_date, h) h = RE_SEMESTER_DATE_FORMAT.sub(utils.repl_cjk_semester, h) h = h.replace('中華民國', '民國') h = RE_HIST_INS_FORMAT.sub(r'\1-\2條', h) h = RE_HIST_STATUTE_FORMAT.sub(r'\1', h) h = utils.normalize_bracketed_numbers(h) h = normalize_spaces(h) h = apply_emphasis(h) buf.write(h) if h[-1] not in '>))。': # Consider <span> as well buf.write('。') buf.write('</li>\n') buf.write('</ol>\n') super().render_act(act) buf.write('</article>\n')
def clear_text_for_snippets(self, text): cleaner = re.compile('[\(].*?[\)]') punctuations = ',.;:!?' text = cleaner.sub('', text) for mark in punctuations: text = text.replace(' {0}'.format(mark), '{0}'.format(mark)) return normalize_spaces(text)
def parse_act(buf): if isinstance(buf, str): buf = StringIO(buf) try: act = Act() act.name = normalize_spaces(buf.readline().strip()) assert act.name assert buf.readline() == '\n' while True: line = buf.readline() if line == '\n': break else: assert line act.history.append(line.strip()) while True: indented_line = buf.readline() if not indented_line: break indented_line = indented_line.rstrip() if not indented_line: continue line = indented_line.lstrip() indent = len(indented_line) - len(line) if indent == 0: # Chapter title m = re.match(r'^(第\S+)\s+(\S+)', line) assert m chapter = Chapter(number=m.group(1), caption=m.group(2)) act.articles.append(chapter) elif indent == 2: # Article m = re.match( r'^(?P<number>(第|附件)[^【\s]+)\s*(【(?P<caption>\S+)】)?', line) if m: article = Article(number=m.group('number'), caption=(m.group('caption') or '')) act.articles.append(article) else: act.articles.append(line) # Foreword text elif indent == 4: # Paragraph paragraph = Paragraph(caption=line) article.subitems.append(paragraph) elif indent == 6: # Subsection subsection = Subsection(caption=line) paragraph.subitems.append(subsection) elif indent == 8: # Item subsection.subitems.append(line) return act except AssertionError: sys.stderr.write(line) raise # parse failed, malformed file
def print_constr(self, sexp_str): if not hasattr(self, 'constr_cache'): self.constr_cache = {} if sexp_str not in self.constr_cache: try: responses, _ = self.send( '(Print ((pp_format PpStr)) (CoqConstr %s))' % sexp_str) self.constr_cache[sexp_str] = normalize_spaces( symbol2str(responses[1][2][1][0][1])) except CoqExn as ex: if ex.err_msg == 'Not_found': return None else: raise ex except TypeError as ex: self.constr_cache[sexp_str] = normalize_spaces( symbol2str(responses[0][2][1][0][1])) return self.constr_cache[sexp_str]
def render_paragraph(self, paragraph): buf = self.buf buf.write('<li>') buf.write(apply_emphasis(normalize_spaces(paragraph.caption))) buf.write('</li>\n') if paragraph.subitems: buf.write('<ol class="subsections">\n') super().render_paragraph(paragraph) buf.write('</ol>\n')
def render_subsection(self, subsection): buf = self.buf caption = RE_SUBSECTION_NUMBERING.sub('', subsection.caption) buf.write('<li>') buf.write(apply_emphasis(normalize_spaces(caption))) buf.write('</li>\n') if subsection.subitems: buf.write('<ol class="items">\n') super().render_subsection(subsection) buf.write('</ol>\n')
def parse_interpretation(buf): if isinstance(buf, str): buf = StringIO(buf) try: intp = Interpretation() intp.name = normalize_spaces(buf.readline().strip()) if intp.name.startswith('解釋'): intp.name = intp.name[2:] # Remove redundant title assert intp.name assert buf.readline() == '\n' while True: line = buf.readline() if line == '\n': break line = line.strip() assert line m = RE_REPUBLIC_DATE_FORMAT.fullmatch(line) if m: intp.date = (m.group('year'), m.group('month'), m.group('day')) continue # Eats the whole line, append later intp.meta.append(line) while True: indented_line = buf.readline() if not indented_line: break indented_line = indented_line.rstrip() if not indented_line: continue line = indented_line.lstrip() indent = len(indented_line) - len(line) if indent == 0 and RE_CHAPTER_FORMAT.fullmatch(line): heading = Heading(line, is_chapter=True) intp.subentries.append(heading) elif (indent <= 2 and not line.startswith('邱丞正、')) or RE_HEADING_FORMAT.fullmatch( line): # workaround for bad indent heading = Heading(line) intp.subentries.append(heading) else: line = line.replace('【', '《').replace('】', '》') intp.subentries.append(line) return intp except AssertionError: sys.stderr.write(line) raise # parse failed, malformed file
def render_interpretation_text(self, text): buf = self.buf # Apply formats text = normalize_brackets(text) text = normalize_spaces(text) text = RE_INTP_JARGON_FORMAT.sub(r'<span class="jargon">\1</span>', text) text = RE_INTP_JARGON_INLINE_FORMAT.sub(r'<span class="jargon">\1</span>', text) text = RE_INTP_CITATION_FORMAT.sub(r'<cite>\1</cite>', text) text = RE_INTP_REMARK_FORMAT.sub(r'<span class="note">\1</span>', text) # Sniff subheading and footnote if RE_INTP_FOOTER_FORMAT.fullmatch(text): buf.write('<p class="footnote">') else: text = RE_INTP_SUBHEADING_FORMAT.sub(r'<span class="subheading">\1\2</span>', text) buf.write('<p>') buf.write(text) buf.write('</p>\n')
def render_interpretation(self, intp): buf = self.buf buf.write('<article class="interpretation">\n') if intp.bookmark_id: buf.write('<header id="') buf.write(intp.bookmark_id) buf.write('">') else: buf.write('<header>') buf.write(intp.name) buf.write('</header>\n' '<div class="meta">') meta = normalize_spaces(','.join(intp.meta)) meta = RE_INTP_META_REMARK_FORMAT.sub(r'<span class="note">\1</span>', meta) buf.write(meta) buf.write('</div>\n') for subentry in intp.subentries: if isinstance(subentry, Heading): self.render_heading(subentry) else: self.render_interpretation_text(subentry) if intp.date: buf.write('<time>民國 {} 年 {} 月 {} 日</time>\n'.format(*intp.date)) buf.write('</article>\n')
def render_heading(self, heading): grade = 5 if heading.is_chapter else 6 caption = normalize_spaces(normalize_brackets(heading.caption)) self.buf.write('<h{}>{}</h{}>\n'.format(grade, caption, grade))
def render_item(self, item): buf = self.buf item = RE_ITEM_NUMBERING.sub('', item) buf.write('<li>') buf.write(apply_emphasis(normalize_spaces(item))) buf.write('</li>\n')