def run(self, parent, blocks): accumulated_tex_blocks = [] # Get all the tex from the document while blocks: block = blocks.pop(0) accumulated_tex_blocks.append(block) # Does this block also end the latex environment? if ("\n%s" % block).rstrip().endswith(LaTeXBlockProcessor.ENV_END): break latex_raw = "\n\n".join(accumulated_tex_blocks) latex_match = LaTeXBlockProcessor.latex_re.match(latex_raw) # Check the latex environment conforms to spec! if latex_match is None: raise Exception("Invalid latex environment:\n%s" % latex_raw) alt = latex_match.group(1) src = latex_match.group(2) # Check to see if this is a block defining the preamble for latex blocks in # this document. if alt == "<preamble>": # For the preamble section self.preamble += src else: # A Normal LaTeX file to render # Make the image a link to the PDF? link_pdf = False if "--pdf" in alt: alt = alt.replace("--pdf", "").strip() link_pdf = True img = os.path.join(self.configs["latex_img_dir"], "%s.png" % (slugify(alt, "_"))) if link_pdf: pdf = os.path.join(self.configs["latex_img_dir"], "%s.pdf" % (slugify(alt, "_"))) else: pdf = None self.render_latex(src, img, pdf) # Add the image of the latex supplied if link_pdf: blocks.insert( 0, "[![%s](file://%s)](file://%s)" % (alt, img, pdf)) else: blocks.insert(0, "![%s](file://%s)" % (alt, img))
def run(self, parent, blocks): accumulated_tex_blocks = [] # Get all the tex from the document while blocks: block = blocks.pop(0) accumulated_tex_blocks.append(block) # Does this block also end the latex environment? if ("\n%s"%block).rstrip().endswith(LaTeXBlockProcessor.ENV_END): break latex_raw = "\n\n".join(accumulated_tex_blocks) latex_match = LaTeXBlockProcessor.latex_re.match(latex_raw) # Check the latex environment conforms to spec! if latex_match is None: raise Exception("Invalid latex environment:\n%s"%latex_raw) alt = latex_match.group(1) src = latex_match.group(2) # Check to see if this is a block defining the preamble for latex blocks in # this document. if alt == "<preamble>": # For the preamble section self.preamble += src else: # A Normal LaTeX file to render # Make the image a link to the PDF? link_pdf = False if "--pdf" in alt: alt = alt.replace("--pdf", "").strip() link_pdf = True img = os.path.join(self.configs["latex_img_dir"], "%s.png"%(slugify(alt, "_"))) if link_pdf: pdf = os.path.join(self.configs["latex_img_dir"], "%s.pdf"%(slugify(alt, "_"))) else: pdf = None self.render_latex(src, img, pdf) # Add the image of the latex supplied if link_pdf: blocks.insert(0, "[![%s](file://%s)](file://%s)"%(alt,img,pdf)) else: blocks.insert(0, "![%s](file://%s)"%(alt,img))
def parseHeaders(source): """ Parse headers to construct Table Of Contents return: [(level, text, position, anchor)] position/anchor is the header position in notesEdit/notesView """ hdrs = [] headers = [] used_ids = set() # In case there are headers with the same name. # hash headers RE = re.compile(r'^(#+)(.+)', re.MULTILINE) for m in RE.finditer(source): level = len(m.group(1)) hdr = m.group(2) pos = m.start() hdrs.append((pos, level, hdr)) # setext headers RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE) for m in RE.finditer(source): if m.group(2).startswith('='): level = 1 else: level = 2 hdr = m.group(1) pos = m.start() hdrs.append((pos, level, hdr)) hdrs.sort() for (p, l, h) in hdrs: anchor = unique(slugify(h, '-'), used_ids) headers.append((l, h, p, anchor)) return headers
def get_section_start_end(lines, anchor_name): """ THIS NEEDS TESTS """ # If there's an underscore + a number at the end of the slug section_number_match = re.match("[^_]*_(\d+)", anchor_name) if section_number_match: section_number = int(section_number_match.group(1)) # Strip the underscore + number section_name = re.match("[^_]*", anchor_name).group() else: section_number = 0 section_name = anchor_name # Start going through the lines, one by one, looking for perfect headers in_code_block = False num_found = 0 last_depth = 5 start = 0 end = len(lines) looking_for_end = False # Set up some regular expressions code_block_re = re.compile("[~`]{3,}") header_re = re.compile("(#{1,5}) ?(.+)") for line_number, line in enumerate(lines): # Ignore headers inside code blocks code_block_match = code_block_re.match(line) if code_block_match: in_code_block = not in_code_block if in_code_block: continue header_match = header_re.match(line) if header_match: # Figure out the depth (needed to determine the end of the section) header_depth = len(header_match.group(1)) if header_depth <= last_depth and looking_for_end: end = line_number looking_for_end = False # Save the name of the section here header = unicode(header_match.group(2)) if slugify(header, "-") == section_name: # This is a potential header! if num_found == section_number: # This is the right one. Start looking for the end looking_for_end = True start = line_number last_depth = header_depth num_found += 1 return (start, end)
def get_section_start_end(lines, anchor_name): """ THIS NEEDS TESTS """ # If there's an underscore + a number at the end of the slug section_number_match = re.match('[^_]*_(\d+)', anchor_name) if section_number_match: section_number = int(section_number_match.group(1)) # Strip the underscore + number section_name = re.match('[^_]*', anchor_name).group() else: section_number = 0 section_name = anchor_name # Start going through the lines, one by one, looking for perfect headers in_code_block = False num_found = 0 last_depth = 5 start = 0 end = len(lines) looking_for_end = False # Set up some regular expressions code_block_re = re.compile('[~`]{3,}') header_re = re.compile('(#{1,5}) ?(.+)') for line_number, line in enumerate(lines): # Ignore headers inside code blocks code_block_match = code_block_re.match(line) if code_block_match: in_code_block = not in_code_block if in_code_block: continue header_match = header_re.match(line) if header_match: # Figure out the depth (needed to determine the end of the section) header_depth = len(header_match.group(1)) if header_depth <= last_depth and looking_for_end: end = line_number looking_for_end = False # Save the name of the section here header = unicode(header_match.group(2)) if slugify(header, '-') == section_name: # This is a potential header! if num_found == section_number: # This is the right one. Start looking for the end looking_for_end = True start = line_number last_depth = header_depth num_found += 1 return (start, end)
def handleMatch(self, m): try: ref = m.group(9) except IndexError: ref = None shortref = False if not ref: # if we got something like "[Google][]" or "[Google]" # we'll use "google" as the id ref = m.group(2) shortref = True # Clean up linebreaks in ref ref = self.NEWLINE_CLEANUP_RE.sub(' ', ref) text = m.group(2) id = ref.lower() if id in self.markdown.references: href, title = self.markdown.references[id] else: anchor = None if '#' in ref: ref, anchor = ref.split('#', 1) this = self.markdown.this if not posixpath.isabs(ref): rootrelpath = '/' + '/'.join(this['components'][:-1]) id = posixpath.normpath(posixpath.join(rootrelpath, ref)) id = id.lower() else: id = ref.lower() ref = ref.lower() if ref in self.markdown.site['reflinks']: if (ref != id) and (id in self.markdown.site['reflinks']): raise UrubuError(ambig_ref_error.format(ref, this['fn'])) id = ref if id in self.markdown.site['reflinks']: item = self.markdown.site['reflinks'][id] href, title = item['url'], item['title'] if shortref: text = title if anchor is not None: text = anchor if anchor is not None: href = '%s#%s' % (href, headerid.slugify(anchor, '-')) else: # ignore undefined refs warn(undef_ref_warning.format(ref, this['fn']), UrubuWarning) return None return self.makeTag(href, title, text)
def group_by_year_month(cls): """ Returns all posts grouped by year and month :return dict: { (year, month): [Posts] } """ order_group = collections.OrderedDict() posts = Post.query.with_entities(Post.id, Post.title, Post.created).order_by(desc(Post.created)) for ((year, month), grouped_posts) in groupby(posts, lambda x: (x.created.year, x.created.month)): grouped = [] for post in grouped_posts: slugged_post = post._asdict() slugged_post['slug'] = slugify(post.title, '-') grouped.append(slugged_post) order_group[(year, calendar.month_name[month])] = grouped return order_group
def parseHeaders(source): ''' Quite basic header parser Headers are used to construct Table Of Contents return: [(hdrLevel, hdrText, hdrPosition, hdrAnchor)] ''' # RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') hdrs = [] used_ids = set() # In case there are headers with the same name. RE = re.compile(r'^(#+)(.+)', re.MULTILINE) for m in RE.finditer(source): hdrLevel = m.group(1) hdr = m.group(2) pos = m.start() anchor = unique(slugify(hdr, '-'), used_ids) hdrs.append((hdrLevel, hdr, pos, anchor)) return hdrs
def allocate_anchors(self, headings): ids = [] labels = [] levels = [] for heading in headings: # Pick an ID id = unique(slugify(heading.text, "-"), ids) # Assign the ID to the heading heading.attrib["id"] = id # Record it ids.append(id) labels.append(heading.text) levels.append(int(heading.tag[1])) return zip(levels, labels, ids)
def group_by_year_month(cls): """ Returns all posts grouped by year and month :return dict: { (year, month): [Posts] } """ order_group = collections.OrderedDict() posts = Post.query.with_entities(Post.id, Post.title, Post.created).order_by( desc(Post.created)) for ((year, month), grouped_posts) in groupby( posts, lambda x: (x.created.year, x.created.month)): grouped = [] for post in grouped_posts: slugged_post = post._asdict() slugged_post['slug'] = slugify(post.title, '-') grouped.append(slugged_post) order_group[(year, calendar.month_name[month])] = grouped return order_group
def prefixed_slugify(*args, **kwargs): return "plm-" + slugify(*args, **kwargs)
def make_slug(string): return headerid.slugify(string, '-')
def parseHeaders(source, strip_fenced_block=False, strip_ascii_math=False): """ Parse headers to construct Table Of Contents return: [(level, text, position, anchor)] position/anchor is the header position in notesEdit/notesView """ hdrs = [] headers = [] used_ids = set() # In case there are headers with the same name. # copied from the asciimathml so we don't have to have a hard dependency to strip ASCIIMATHML_RE = re.compile(r'^(.*)\$\$([^\$]*)\$\$(.*)$', re.M) # copied from the fenced block code FENCED_BLOCK_RE = re.compile( r''' (?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ (\{?\.?(?P<lang>[a-zA-Z0-9_+-]*))?[ ]* # Optional {, and lang # Optional highlight lines, single- or double-quote-delimited (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* }?[ ]*\n # Optional closing } (?P<code>.*?)(?<=\n) (?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) filtered_source = source #if applicable, strip out any trouble text before we start parsing the headers if strip_fenced_block: m = FENCED_BLOCK_RE.search(filtered_source) while m is not None: nfillers = (m.end() - m.start()) filtered_source = FENCED_BLOCK_RE.sub("\n" * nfillers, filtered_source, count=1) m = FENCED_BLOCK_RE.search(filtered_source) if strip_ascii_math: m = ASCIIMATHML_RE.search(filtered_source) while m is not None: nfillers = (m.end() - m.start()) filtered_source = ASCIIMATHML_RE.sub("\n" * nfillers, filtered_source, count=1) m = ASCIIMATHML_RE.search(filtered_source) # hash headers RE = re.compile(r'^(#+)(.+)', re.MULTILINE) for m in RE.finditer(filtered_source): level = len(m.group(1)) hdr = m.group(2) pos = m.start() hdrs.append((pos, level, hdr)) # setext headers RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE) for m in RE.finditer(filtered_source): if m.group(2).startswith('='): level = 1 else: level = 2 hdr = m.group(1) pos = m.start() hdrs.append((pos, level, hdr)) hdrs.sort() for (p, l, h) in hdrs: anchor = unique(slugify(h, '-'), used_ids) headers.append((l, h, p, anchor)) return headers
def slug(self): """ Returns 'sluggified' version of the title :return str: """ return slugify(self.title, '-')
def parseHeaders(source, strip_fenced_block=False, strip_ascii_math=False): """ Parse headers to construct Table Of Contents return: [(level, text, position, anchor)] position/anchor is the header position in notesEdit/notesView """ hdrs = [] headers = [] used_ids = set() # In case there are headers with the same name. # copied from the asciimathml so we don't have to have a hard dependency to strip ASCIIMATHML_RE = re.compile(r'^(.*)\$\$([^\$]*)\$\$(.*)$', re.M) # copied from the fenced block code FENCED_BLOCK_RE = re.compile(r''' (?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ (\{?\.?(?P<lang>[a-zA-Z0-9_+-]*))?[ ]* # Optional {, and lang # Optional highlight lines, single- or double-quote-delimited (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* }?[ ]*\n # Optional closing } (?P<code>.*?)(?<=\n) (?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) filtered_source = source #if applicable, strip out any trouble text before we start parsing the headers if strip_fenced_block: m = FENCED_BLOCK_RE.search(filtered_source) while m is not None: nfillers = (m.end() - m.start()) filtered_source = FENCED_BLOCK_RE.sub("\n"*nfillers, filtered_source, count=1) m = FENCED_BLOCK_RE.search(filtered_source) if strip_ascii_math: m = ASCIIMATHML_RE.search(filtered_source) while m is not None: nfillers = (m.end() - m.start()) filtered_source = ASCIIMATHML_RE.sub("\n"*nfillers, filtered_source, count=1) m = ASCIIMATHML_RE.search(filtered_source) # hash headers RE = re.compile(r'^(#+)(.+)', re.MULTILINE) for m in RE.finditer(filtered_source): level = len(m.group(1)) hdr = m.group(2) pos = m.start() hdrs.append((pos, level, hdr)) # setext headers RE = re.compile(r'(.+)\n([=-]+[ ]*)(\n|$)', re.MULTILINE) for m in RE.finditer(filtered_source): if m.group(2).startswith('='): level = 1 else: level = 2 hdr = m.group(1) pos = m.start() hdrs.append((pos, level, hdr)) hdrs.sort() for (p, l, h) in hdrs: anchor = unique(slugify(h, '-'), used_ids) headers.append((l, h, p, anchor)) return headers