def parse(self, source): """ Like super.convert() but returns the parse tree instead of doing postprocessing. """ # Fixup the source text if not source.strip(): return '' # a blank unicode string try: source = util.text_type(source) except UnicodeDecodeError as e: # pragma: no cover # Customise error message while maintaining original trackback e.reason += '. -- Note: Markdown only accepts unicode input!' raise # Split into lines and run the line preprocessors. self.lines = source.split("\n") for prep in self.preprocessors: self.lines = prep.run(self.lines) # Parse the high-level elements. root = self.parser.parseDocument(self.lines).getroot() # Run the tree-processors for treeprocessor in self.treeprocessors: newRoot = treeprocessor.run(root) if newRoot is not None: root = newRoot return root
def handleMatch(self, m, data): id = m.group(1) if id in self.footnotes.footnotes.keys(): sup = etree.Element("sup") a = etree.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) a.set('class', 'footnote-ref') idx = list(self.footnotes.footnotes.keys()).index(id) + 1 if self.footnotes.getConfig("USE_LETTERS") and idx <= 26: d = dict(enumerate(string.ascii_lowercase, 1)) a.text = util.text_type(d[idx]) else: a.text = util.text_type(idx) return sup, m.start(0), m.end(0) else: return None, None, None
def handleMatch(self, m): """Return link optionally without protocol.""" el = md_util.etree.Element("a") el.set('href', self.unescape(m.group(2))) el.text = md_util.AtomicString(m.group(2)) if self.config['hide_protocol']: el.text = md_util.AtomicString(el.text[el.text.find("://") + 3:]) if self.config.get('repo_url_shortener', False): el.set('magiclink', md_util.text_type(MAGIC_AUTO_LINK)) return el
def handleMatch(self, m): """Return link optionally without protocol.""" el = md_util.etree.Element("a") el.set('href', self.unescape(m.group(2))) el.text = md_util.AtomicString(m.group(2)) if self.config['hide_protocol']: el.text = md_util.AtomicString(el.text[el.text.find("://") + 3:]) if self.config.get('repo_url_shortener', False): el.set('magiclink', md_util.text_type(MAGIC_AUTO_LINK)) return el
def _getElemFromSource(source): md = Markdown(extensions=['tables']) if not source.strip(): raise ValueError(app.tr("Schema file is empty.")) try: source = util.text_type(source) except UnicodeDecodeError as e: e.reason += '. -- Note: Markdown only accepts unicode input!' raise lines = source.split("\n") for prep in md.preprocessors.values(): lines = prep.run(lines) # Parse the high-level elements. return md.parser.parseDocument(lines).getroot()
def handleMatch(self, m): id = m.group(2) if id in self.footnotes.footnotes.keys(): sup = etree.Element("sup") a = etree.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id)) sup.set('class', 'footnote-sup') a.set('href', '#' + self.footnotes.makeFootnoteId(id)) if self.footnotes.md.output_format not in ['html5', 'xhtml5']: a.set('rel', 'footnote') # invalid in HTML5 a.set('class', 'footnote-ref') a.text = text_type(self.footnotes.footnotes.index(id) + 1) return sup else: return None
def _getElemFromSource(source): md = Markdown(extensions=['tables']) if not source.strip(): raise ValueError(app.tr( "Schema file is empty." )) try: source = util.text_type(source) except UnicodeDecodeError as e: e.reason += '. -- Note: Markdown only accepts unicode input!' raise lines = source.split("\n") for prep in md.preprocessors.values(): lines = prep.run(lines) # Parse the high-level elements. return md.parser.parseDocument(lines).getroot()
def handleMatch(self, m, data): """Handle URL matches.""" el = md_util.etree.Element("a") el.text = md_util.AtomicString(m.group('link')) if m.group("www"): href = "http://%s" % m.group('link') else: href = m.group('link') if self.config['hide_protocol']: el.text = md_util.AtomicString(el.text[el.text.find("://") + 3:]) el.set("href", self.unescape(href.strip())) if self.config.get('repo_url_shortener', False): el.set('magiclink', md_util.text_type(MAGIC_LINK)) return el, m.start(0), m.end(0)
def handleMatch(self, m): """Handle URL matches.""" el = md_util.etree.Element("a") el.text = md_util.AtomicString(m.group(2)) if m.group("www"): href = "http://%s" % m.group(2) else: href = m.group(2) if self.config['hide_protocol']: el.text = md_util.AtomicString(el.text[el.text.find("://") + 3:]) el.set("href", self.sanitize_url(self.unescape(href.strip()))) if self.config.get('repo_url_shortener', False): el.set('magiclink', md_util.text_type(MAGIC_LINK)) return el
def _convert_to_elem(self, source): ''' Run the convert step until block parsing is done. only useful for introspecting. ''' if not source.strip(): return '' # a blank unicode string try: source = util.text_type(source) except UnicodeDecodeError as e: # pragma: no cover # Customise error message while maintaining original trackback e.reason += '. -- Note: Markdown only accepts unicode input!' raise # Split into lines and run the line preprocessors. self._run_preprocessors(source, concat=False) # Parse the high-level elements. root = self.parser.parseDocument(self.lines).getroot() return root
def parse(self, source): """ Like super.convert() but returns the parse tree """ # Fixup the source text if not source.strip(): return '' # a blank unicode string try: source = util.text_type(source) except UnicodeDecodeError as e: # pragma: no cover # Customise error message while maintaining original trackback e.reason += '. -- Note: Markdown only accepts unicode input!' raise # Split into lines and run the line preprocessors. self.lines = source.split("\n") # newlines = [] # newlines[:] = [n + " " for n in self.lines] # self.lines = newlines for prep in self.preprocessors: self.lines = prep.run(self.lines) # Parse the high-level elements. root = self.parser.parseDocument(self.lines).getroot() # Run the tree-processors for treeprocessor in self.treeprocessors: newRoot = treeprocessor.run(root) if newRoot is not None: root = newRoot # Serialize _properly_. Strip top-level tags. output = self.serializer(root) if self.stripTopLevelTags: try: start = output.index( '<%s>' % self.doc_tag) + len(self.doc_tag) + 2 end = output.rindex('</%s>' % self.doc_tag) output = output[start:end].strip() except ValueError: # pragma: no cover if output.strip().endswith('<%s />' % self.doc_tag): # We have an empty document output = '' else: # We have a serious problem raise ValueError('Markdown failed to strip top-level ' 'tags. Document=%r' % output.strip()) # Run the text post-processors for pp in self.postprocessors: output = pp.run(output) # CONVERT THE HTML BACK TO ROOT parser = etree.HTMLParser() tree = etree.parse(StringIO(output.strip()), parser) root = tree.getroot() return root