def title_and_summary(self): """ Return the first block's content as title, if it is a paragraph or comment, then the second block's content as a summary, if it is a caption. Content will be trimmed with an ellipsis added, as one line. """ title, summary = '', '' if len(self.blocks) > 0: if isinstance(self.blocks[0], Paragraph): title = one_line( shorten(self.blocks[0].content, 128, placeholder="...")) if isinstance(self.blocks[0], CharacterBlock): if self.blocks[0].control_character == '%': title = one_line( shorten(self.blocks[0].content[2:], 128, placeholder="...")) if len(self.blocks) > 1: if isinstance(self.blocks[1], CharacterBlock): if self.blocks[1].control_character == Config.caption: summary = one_line( shorten(self.blocks[1].content[2:], 128, placeholder="...")) return title, summary
def split_to_array(text, prefixes, capture_characters=True): """ ? This / is ? / a block Note line 3 is 1 char in length; must match a space OR a line ending. For prefixes = '?/=': With capture_characters: [('?', 'This'), ('/', 'is'), ('?', ''), ('/', 'a block')] Without: ['This', 'is', '', 'a block'] """ # Retain the first prefix if present r1 = re.compile("[%s](?: ?|$)" % re.escape(prefixes)) if r1.match(text): # at start of string, vs. search() prefix, content = [text[0]], text[2:] else: prefix, content = [prefixes[0]], text if capture_characters: # Split with brackets captures the leading chars... r2 = re.compile("\n([%s])(?: ?|$)" % re.escape(prefixes)) lines = [one_line(_) for _ in r2.split(content)] l = prefix + lines # So: char, line, char, line... # Convert to [(char, line), (char, line)... ] out = [tuple(l[i:i + 2]) for i in range(0, len(l), 2)] else: r2 = re.compile("\n[%s](?: ?|$)" % re.escape(prefixes)) out = [one_line(_) for _ in r2.split(content)] return out
def tag(self, alias1, tag1, subtag1, punctuation, numbering, count): """ Take #Tags:Tag and return a link to its Index line. Also store the back-link for when the index is generated. """ alias = one_line(alias1) tag = one_line(tag1) subtag = one_line(subtag1) number = get_number(numbering) nav_id = get_nav_id(tag, subtag, numbering, count) if tag not in self.tags: self.tags[tag] = {} if subtag not in self.tags[tag]: self.tags[tag][subtag] = {} if number not in self.tags[tag][subtag]: self.tags[tag][subtag][number] = [] link = trim(""" <a id=\"%s\" href=\"#ref_%s\">%s%s<sup>%s</sup></a> """) % ( nav_id, nav_id, alias, punctuation, count ) ref_link = "<a id=\"ref_%s\" href=\"#%s\">%s</a>" % \ (nav_id, nav_id, count) self.tags[tag][subtag][number] += [ref_link] return link
def text(self): """ Leave lines beginning with a control character unwrapped. """ tuples = split_to_array(self.content, Config.all_control_chars) lines = [char + ' ' + one_line(line) for char, line in tuples] space_above = "\n" if self.control_character in Config.subheads else "" return space_above + "\n".join(lines)
def test_one_line(): text = trim(""" Sentence. Sentence! Sentence; sentence. Sentence sentence. """) normalised = "Sentence. Sentence! Sentence; sentence. Sentence sentence." assert one_line(text) == normalised
def split_to_dictionary(text, prefix='$', delimiter='='): """ Like split_to_array, but with leaders that become keys. $ key1 = value1 $ key2 = value2 {'key1': 'value1', 'key2': 'value2'} """ lines = split_to_array(text, prefix, capture_characters=True) out = {} for char, tail in lines: parts = tail.split(' %s ' % delimiter) if len(parts) == 2: out[parts[0].strip()] = one_line(parts[1].strip()) return out
def __init__(self, text: str): "Normalize spacing." self.content = one_line(text)
def extract_title(text): """ Return the first paragraph in a block of text. """ return one_line(text.split("\n\n", 1)[0])