def __init__(self, settings_spec=None, settings_overrides=dict(report_level=5, halt_level=5), config_section='general'): self.pub = Publisher(reader=None, parser=None, writer=None, settings=None, source_class=io.StringInput, destination_class=io.StringOutput) self.pub.set_components(reader_name='standalone', parser_name='restructuredtext', writer_name='html') # hack: JEP-0071 does not allow HTML char entities, so we hack our way # out of it. # — == u"\u2014" # a setting to only emit charater entities in the writer would be nice # FIXME: several are emitted, and they are explicitly forbidden # in the JEP # == u"\u00a0" self.pub.writer.translator_class.attribution_formats['dash'] = ( '\u2014', '') self.pub.process_programmatic_settings(settings_spec, settings_overrides, config_section)
def publish_cmdline_to_binary(reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=1, argv=None, usage=default_usage, description=default_description, destination=None, destination_class=BinaryFileOutput ): """ Set up & run a `Publisher` for command-line-based file I/O (input and output file paths taken automatically from the command line). Return the encoded string output also. This is just like publish_cmdline, except that it uses io.BinaryFileOutput instead of io.FileOutput. Parameters: see `publish_programmatically` for the remainder. - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``. - `usage`: Usage string, output if there's a problem parsing the command line. - `description`: Program description, output for the "--help" option (along with command-line option descriptions). """ pub = Publisher(reader, parser, writer, settings=settings, destination_class=destination_class) pub.set_components(reader_name, parser_name, writer_name) output = pub.publish( argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) return output
def publish(writer_name): reader=None reader_name='standalone' parser=None parser_name='restructuredtext' writer=None settings=None settings_spec=None settings_overrides=options[writer_name] config_section=None enable_exit=1 argv=[] usage=default_usage pub = Publisher(reader, parser, writer, settings=settings) pub.set_components(reader_name, parser_name, writer_name) settings = pub.get_settings(settings_spec=settings_spec, config_section=config_section) if settings_overrides: settings._update(settings_overrides, 'loose') source = file(source_path) pub.set_source(source, source_path) destination_path = 'pg1.' + extensions[writer_name] destination = file(destination_path, 'w') pub.set_destination(destination, destination_path) pub.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit=enable_exit)
def _to_odp_content(self, rst, xml_filename, odp_name='/tmp/out'): reader = standalone.Reader() reader_name = 'standalone' writer = rst2odp.Writer() writer_name = 'pseudoxml' parser = None parser_name = 'restructuredtext' settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher(reader, parser, writer,# source=StringIO(rst), settings=settings, destination_class=rst2odp.BinaryFileOutput) publisher.set_components(reader_name, parser_name, writer_name) description = ('Generates OpenDocument/OpenOffice/ODF slides from ' 'standalone reStructuredText sources. ' + default_description) fin = open('/tmp/in.rst', 'w') fin.write(rst) fin.close() argv = ['--traceback', '/tmp/in.rst', odp_name] output = publisher.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) # pull content.xml out of /tmp/out z = zipwrap.Zippier(odp_name) fout = open(xml_filename, 'w') content = preso.pretty_xml(z.cat('content.xml')) fout.write(content) fout.close() return content
def create_publisher(app: "Sphinx", filetype: str) -> Publisher: reader = SphinxStandaloneReader() reader.setup(app) parser = app.registry.create_source_parser(app, filetype) if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ( ): # a workaround for recommonmark # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser # internally. But recommonmark-0.4.0 does not provide settings_spec for reST # parser. As a workaround, this copies settings_spec for RSTParser to the # CommonMarkParser. from docutils.parsers.rst import Parser as RSTParser parser.settings_spec = RSTParser.settings_spec pub = Publisher(reader=reader, parser=parser, writer=SphinxDummyWriter(), source_class=SphinxFileInput, destination=NullOutput()) # Propagate exceptions by default when used programmatically: defaults = {"traceback": True, **app.env.settings} # Set default settings pub.settings = pub.setup_option_parser( **defaults).get_default_values() # type: ignore return pub
def publish_cmdline(reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=1, argv=None, usage=default_usage, description=default_description): """ See docutils.core.publish_cmdline. We just modified this function to return the parsed destination file. """ pub = Publisher(reader, parser, writer, settings=settings) pub.set_components(reader_name, parser_name, writer_name) output = pub.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) return output, pub.settings._source, pub.settings._destination
def main(args=sys.argv): argv = None reader = standalone.Reader() reader_name = "standalone" writer = EpubWriter() writer_name = "epub2" parser = Parser() parser_name = "restructuredtext" settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher( reader, parser, writer, settings, destination_class=EpubFileOutput ) publisher.set_components(reader_name, parser_name, writer_name) description = ( "Generates epub books from reStructuredText sources. " + default_description ) publisher.publish( argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status, )
def main(args): argv = None reader = standalone.Reader() reader_name = 'standalone' writer = EpubWriter() writer_name = 'epub2' parser = Parser() parser_name = 'restructuredtext' settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher(reader, parser, writer, settings, destination_class=EpubFileOutput) publisher.set_components(reader_name, parser_name, writer_name) description = ('Generates epub books from reStructuredText sources. ' + default_description) output = publisher.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status)
def read_doc(app, env, filename): # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document """Parse a document and convert to doctree.""" filetype = get_filetype(app.config.source_suffix, filename) input_class = app.registry.get_source_input(filetype) reader = SphinxStandaloneReader(app) source = input_class(app, env, source=None, source_path=filename, encoding=env.config.source_encoding) parser = app.registry.create_source_parser(app, filetype) if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): # a workaround for recommonmark # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser # internally. But recommonmark-0.4.0 does not provide settings_spec for reST # parser. As a workaround, this copies settings_spec for RSTParser to the # CommonMarkParser. parser.settings_spec = RSTParser.settings_spec pub = Publisher(reader=reader, parser=parser, writer=SphinxDummyWriter(), source_class=SphinxDummySourceClass, destination=NullOutput()) pub.set_components(None, 'restructuredtext', None) pub.process_programmatic_settings(None, env.settings, None) pub.set_source(source, filename) pub.publish() return pub.document
def parse_docstring(doc): p = Publisher(source=doc, source_class=io.StringInput) p.set_reader('standalone', p.parser, 'restructuredtext') p.writer = Writer() p.process_programmatic_settings(None, None, None) p.set_source(doc, None) return p.publish()
def main(prog_args): argv = None reader = standalone.Reader() reader_name = 'standalone' writer = Writer() writer_name = 'pseudoxml' parser = None parser_name = 'restructuredtext' settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher(reader, parser, writer, settings, destination_class=BinaryFileOutput) publisher.set_components(reader_name, parser_name, writer_name) description = ('Generates OpenDocument/OpenOffice/ODF slides from ' 'standalone reStructuredText sources. ' + default_description) output = publisher.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status)
def from_file(cls, filename): """ Loads a file from disk, parses it and constructs a new BlogPost. This method reflects a bit of the insanity of docutils. Basically this is just the docutils.core.publish_doctree function with some modifications to use an html writer and to load a file instead of a string. """ pub = Publisher(destination_class=NullOutput, source=FileInput(source_path=filename), reader=BlogPostReader(), writer=HTMLWriter(), parser=RSTParser()) pub.get_settings() # This is not sane. pub.settings.traceback = True # Damnit pub.publish() meta = pub.document.blog_meta post = cls(meta['title'], meta['post_date'], meta['author'], meta['tags'], pub.writer.parts['html_body']) post.filename = filename return post
def render_partial(self, node): """Utility: Render a lone doctree node.""" if node is None: return {'fragment': ''} doc = new_document(b'<partial node>') doc.append(node) if self._publisher is None: self._publisher = Publisher( source_class=DocTreeInput, destination_class=StringOutput) self._publisher.set_components('standalone', 'restructuredtext', 'pseudoxml') pub = self._publisher pub.reader = DoctreeReader() pub.writer = MarkdownWriter(self) pub.process_programmatic_settings( None, {}, None) #pub.process_programmatic_settings( # None, {'output_encoding': 'unicode'}, None) pub.set_source(doc, None) pub.set_destination(None, None) pub.publish() return pub.writer.parts
def zotero_odf_scan_publish_cmdline_to_binary(reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=True, argv=None, usage=default_usage, description=description, destination=None, destination_class=io.BinaryFileOutput ): # Tried to get internal conversion working, but using the big hammer is so much simpler. ofh = NamedTemporaryFile(mode='w+', delete=False) if len(sys.argv) > 1: txt = open(sys.argv[1]).read() oldtxt = txt txt = re.sub("{([^|}]*)\|([^|}]*)\|([^|}]*)\|([^|}]*)\|([^|}]*)}", "{\\1\\|\\2\\|\\3\\|\\4\\|\\5}", txt,re.S|re.M) ofh.write(txt) sys.argv[1] = ofh.name ofh.close() pub = Publisher(reader=reader, parser=parser, writer=writer, settings=settings, destination_class=destination_class) output = pub.publish( argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) # See above. os.unlink(ofh.name)
def __init__(self): self._publisher = Publisher(source_class=StringInput, destination_class=StringOutput) self._publisher.set_components('standalone', 'restructuredtext', 'html') self._publisher.writer.translator_class = MyHTMLTranslator self._publisher.process_programmatic_settings(None, extra_params, None)
def process_description(source, output_encoding='unicode'): """Given an source string, returns an HTML fragment as a string. The return value is the contents of the <body> tag. Parameters: - `source`: A multi-line text string; required. - `output_encoding`: The desired encoding of the output. If a Unicode string is desired, use the default value of "unicode" . """ # Dedent all lines of `source`. source = trim_docstring(source) settings_overrides = { 'raw_enabled': 0, # no raw HTML code 'file_insertion_enabled': 0, # no file/URL access 'halt_level': 2, # at warnings or errors, raise an exception 'report_level': 5, # never report problems with the reST code } parts = None # Convert reStructuredText to HTML using Docutils. document = publish_doctree(source=source, settings_overrides=settings_overrides) for node in document.traverse(): if node.tagname == '#text': continue if node.hasattr('refuri'): uri = node['refuri'] elif node.hasattr('uri'): uri = node['uri'] else: continue o = urlparse(uri) if o.scheme not in ALLOWED_SCHEMES: raise TransformError('link scheme not allowed: {0}'.format(uri)) # now turn the transformed document into HTML reader = readers.doctree.Reader(parser_name='null') pub = Publisher(reader, source=io.DocTreeInput(document), destination_class=io.StringOutput) pub.set_writer('html') pub.process_programmatic_settings(None, settings_overrides, None) pub.set_destination(None, None) pub.publish() parts = pub.writer.parts output = parts['body'] if output_encoding != 'unicode': output = output.encode(output_encoding) return output
def publish(self): Publisher.publish(self) # set names and ids attribute to section node from docutils import nodes for section in self.document.traverse(nodes.section): titlenode = section[0] name = nodes.fully_normalize_name(titlenode.astext()) section['names'].append(name) self.document.note_implicit_target(section, section)
def pypi_render(source): """ Copied (and slightly adapted) from pypi.description_tools """ ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet wais irc'''.split() settings_overrides = { "raw_enabled": 0, # no raw HTML code "file_insertion_enabled": 0, # no file/URL access "halt_level": 2, # at warnings or errors, raise an exception "report_level": 5, # never report problems with the reST code } # capture publishing errors, they go to stderr old_stderr = sys.stderr sys.stderr = s = StringIO.StringIO() parts = None try: # Convert reStructuredText to HTML using Docutils. document = publish_doctree(source=source, settings_overrides=settings_overrides) for node in document.traverse(): if node.tagname == '#text': continue if node.hasattr('refuri'): uri = node['refuri'] elif node.hasattr('uri'): uri = node['uri'] else: continue o = urlparse.urlparse(uri) if o.scheme not in ALLOWED_SCHEMES: raise TransformError('link scheme not allowed') # now turn the transformed document into HTML reader = readers.doctree.Reader(parser_name='null') pub = Publisher(reader, source=io.DocTreeInput(document), destination_class=io.StringOutput) pub.set_writer('html') pub.process_programmatic_settings(None, settings_overrides, None) pub.set_destination(None, None) pub.publish() parts = pub.writer.parts except: pass sys.stderr = old_stderr # original text if publishing errors occur if parts is None or len(s.getvalue()) > 0: return None else: return parts['body']
def __init__(self): """ Defer to Publisher init. """ Publisher.__init__(self) self.extractors = [] # list of (transform, storage) type pairs """ Transforms that are run during `process`. See Nabu for their interface. The stores may be left uninitialized until `prepare_extractors`. """ self.source_class = None
def inspect( filename, source_path=None, ): "returns the document object before any transforms)" from docutils.core import Publisher pub = Publisher(source_class=io.FileInput, ) pub.set_reader('standalone', None, "restructuredtext") pub.process_programmatic_settings(None, None, None) pub.set_source(source_path=source_path) pub.set_io() return pub.reader.read(pub.source, pub.parser, pub.settings)
def get_html(self, body_only=True, content_only=False, noclasses=False): import sys import pygments_rest from docutils.core import Publisher from docutils.io import StringInput, StringOutput from cStringIO import StringIO settings = {'doctitle_xform' : 1, 'pep_references' : 1, 'rfc_references' : 1, 'footnote_references': 'superscript', 'output_encoding' : 'unicode', 'report_level' : 2, # 2=show warnings, 3=show only errors, 5=off (docutils.utils } if content_only: post_rst = self.get_rst(noclasses=noclasses) else: post_rst = render_to('post_single.rst', post=self, noclasses=noclasses) pub = Publisher(reader=None, parser=None, writer=None, settings=None, source_class=StringInput, destination_class=StringOutput) pub.set_components(reader_name='standalone', parser_name='restructuredtext', writer_name='html') pub.process_programmatic_settings(settings_spec=None, settings_overrides=settings, config_section=None) pub.set_source(post_rst,source_path=self.module_path) pub.set_destination(None, None) errors_io = StringIO() real_stderr = sys.stderr sys.stderr = errors_io try: html_full = pub.publish(enable_exit_status=False) html_body = ''.join(pub.writer.html_body) finally: sys.stderr = real_stderr errors = errors_io.getvalue() self._process_rest_errors(errors) errors_io.close() return html_body if body_only else html_full
def process_labels(site, logger, source, post): site.processing_labels = True pub = Publisher(reader=Reader(), parser=None, writer=None) pub.set_components(None, 'restructuredtext', 'html') # Reading the file will generate output/errors that we don't care about # at this stage. The report_level = 5 means no output pub.process_programmatic_settings( settings_spec=None, settings_overrides={'report_level': 5}, config_section=None, ) pub.set_source(None, source) pub.publish() document = pub.document site.processing_labels = False # Code based on Sphinx std domain for name, is_explicit in document.nametypes.items(): if not is_explicit: continue labelid = document.nameids[name] if labelid is None: continue node = document.ids[labelid] if node.tagname == 'target' and 'refid' in node: node = document.ids.get(node['refid']) labelid = node['names'][0] if node.tagname == 'footnote' or 'refuri' in node or node.tagname.startswith( 'desc_'): continue if name in site.ref_labels: logger.warn( 'Duplicate label {dup}, other instance in {other}'.format( dup=name, other=site.ref_labels[name][0])) site.anon_ref_labels[name] = post.permalink(), labelid def clean_astext(node): """Like node.astext(), but ignore images. Taken from sphinx.util.nodes""" node = node.deepcopy() for img in node.traverse(nodes.image): img['alt'] = '' for raw in node.traverse(nodes.raw): raw.parent.remove(raw) return node.astext() if node.tagname in ('section', 'rubric'): sectname = clean_astext(node[0]) else: continue site.ref_labels[name] = post.permalink(), labelid, sectname
class HTMLGenerator: """ Really simple HTMLGenerator starting from publish_parts It reuses the docutils.core.Publisher class, which means it is *not* threadsafe. """ def __init__(self, settings_spec=None, settings_overrides=None, config_section='general'): if settings_overrides is None: settings_overrides = {'report_level': 5, 'halt_level': 5} self.pub = Publisher(reader=None, parser=None, writer=None, settings=None, source_class=io.StringInput, destination_class=io.StringOutput) self.pub.set_components(reader_name='standalone', parser_name='restructuredtext', writer_name='html') # hack: JEP-0071 does not allow HTML char entities, so we hack our way # out of it. # — == u"\u2014" # a setting to only emit charater entities in the writer would be nice # FIXME: several are emitted, and they are explicitly forbidden # in the JEP # == u"\u00a0" self.pub.writer.translator_class.attribution_formats['dash'] = ( '\u2014', '') self.pub.process_programmatic_settings(settings_spec, settings_overrides, config_section) def create_xhtml(self, text, destination=None, destination_path=None, enable_exit_status=None): """ Create xhtml for a fragment of IM dialog. We can use the source_name to store info about the message """ self.pub.set_source(text, None) self.pub.set_destination(destination, destination_path) self.pub.publish(enable_exit_status=enable_exit_status) # kludge until we can get docutils to stop generating (rare) # entities return '\u00a0'.join( self.pub.writer.parts['fragment'].strip().split(' '))
def inspect(filename, source_path=None): "returns the document object before any transforms)" from docutils.core import Publisher pub = Publisher(source_class=io.FileInput) pub.set_reader("standalone", None, "restructuredtext") pub.process_programmatic_settings(None, None, None) pub.set_source(source_path=source_path) pub.set_io() return pub.reader.read(pub.source, pub.parser, pub.settings)
def get_sphinx(): sphinx = getattr(local_data, 'sphinx', None) if sphinx is None: sphinx = Sphinx(tempdir, tempdir, tempdir, tempdir, 'json', status=None, warning=None) sphinx.builder.translator_class = CustomHTMLTranslator sphinx.env.patch_lookup_functions() sphinx.env.temp_data['docname'] = 'text' sphinx.env.temp_data['default_domain'] = 'py' pub = Publisher(reader=None, parser=None, writer=HTMLWriter(sphinx.builder), source_class=io.StringInput, destination_class=io.NullOutput) pub.set_components('standalone', 'restructuredtext', None) pub.process_programmatic_settings(None, sphinx.env.settings, None) pub.set_destination(None, None) sphinx.publisher = pub local_data.sphinx = sphinx return sphinx, sphinx.publisher
def processHTML(src, depth): src = os.path.normpath(src) prefix = os.path.splitext(src)[0] suffix = os.path.splitext(src)[1][1:] if suffix != DEFAULTLANG: return dst = prefix + '.html' #.' + suffix dst_abs = os.path.normpath(os.path.join(TRGROOT, dst)) src_abs = os.path.normpath(os.path.join(SRCROOT, src)) dst_dir = os.path.dirname(dst_abs) makedir(dst_dir) if newer([src_abs], dst_abs): reportBuilding(src) arguments = [ '--no-generator', '--language=' + suffix, '--no-source-link', '--no-datestamp', '--output-encoding=iso-8859-15', '--target-suffix=html', '--stylesheet=' + '../' * depth + 'aros.css', '--link-stylesheet', src_abs, dst_abs ] publisher = Publisher() publisher.set_reader('standalone', None, 'restructuredtext') publisher.set_writer('html') publisher.publish(argv=arguments) else: reportSkipping(dst)
def convertWWW(src, language, options=None): if language == 'el': encoding = 'iso-8859-7' elif language == 'pl': encoding = 'iso-8859-2' elif language == 'ru': encoding = 'windows-1251' elif language == 'cs': encoding = 'iso-8859-2' else: encoding = 'iso-8859-15' arguments = [ '--no-generator', '--language=' + language, '--no-source-link', '--no-datestamp', '--input-encoding=' + encoding, '--output-encoding=' + encoding, '--target-suffix=' + 'php', src, '' ] if options: for option in options: arguments.insert(0, option) publisher = Publisher(destination_class=NullOutput) publisher.set_reader('standalone', None, 'restructuredtext') publisher.set_writer('html') publisher.publish(argv=arguments) return ''.join(publisher.writer.body_pre_docinfo + publisher.writer.body).encode(encoding)
def render(self): key = 'cia.apps.blog.%d' % self.id parts = cache.get(key) if not parts: # Convert the reST markup to a document tree document = publish_doctree(source=self.content) visitor = ImageTranslator(document) document.walkabout(visitor) # # Publish that document tree as HTML. We can't use any of # the simpler methods in docutils.core, since we need # access to writer.parts # reader = doctree.Reader(parser_name='null') pub = Publisher(reader, None, None, source=DocTreeInput(document), destination_class=StringOutput) pub.set_writer('html4css1') pub.process_programmatic_settings(None, { 'cloak_email_addresses': True, 'initial_header_level': 2, }, None) pub.publish() parts = pub.writer.parts cache.set(key, parts) return parts
def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document: """Parse a document and convert to doctree.""" # set up error_handler for the target document error_handler = UnicodeDecodeErrorHandler(env.docname) codecs.register_error('sphinx', error_handler) # type: ignore reader = SphinxStandaloneReader() reader.setup(app) filetype = get_filetype(app.config.source_suffix, filename) parser = app.registry.create_source_parser(app, filetype) if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ( ): # a workaround for recommonmark # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser # internally. But recommonmark-0.4.0 does not provide settings_spec for reST # parser. As a workaround, this copies settings_spec for RSTParser to the # CommonMarkParser. parser.settings_spec = RSTParser.settings_spec pub = Publisher(reader=reader, parser=parser, writer=SphinxDummyWriter(), source_class=SphinxFileInput, destination=NullOutput()) pub.process_programmatic_settings(None, env.settings, None) pub.set_source(source_path=filename) pub.publish() return pub.document
def render_readme_like_pypi(source, output_encoding='unicode'): """ Render a ReST document just like PyPI does. """ # Dedent all lines of `source`. source = trim_docstring(source) settings_overrides = { 'raw_enabled': 0, # no raw HTML code 'file_insertion_enabled': 0, # no file/URL access 'halt_level': 2, # at warnings or errors, raise an exception 'report_level': 5, # never report problems with the reST code } parts = None # Convert reStructuredText to HTML using Docutils. document = publish_doctree(source=source, settings_overrides=settings_overrides) for node in document.traverse(): if node.tagname == '#text': continue if node.hasattr('refuri'): uri = node['refuri'] elif node.hasattr('uri'): uri = node['uri'] else: continue o = urlparse(uri) if o.scheme not in ALLOWED_SCHEMES: raise TransformError('link scheme not allowed: {0}'.format(uri)) # now turn the transformed document into HTML reader = readers.doctree.Reader(parser_name='null') pub = Publisher(reader, source=io.DocTreeInput(document), destination_class=io.StringOutput) pub.set_writer('html') pub.process_programmatic_settings(None, settings_overrides, None) pub.set_destination(None, None) pub.publish() parts = pub.writer.parts output = parts['body'] if output_encoding != 'unicode': output = output.encode(output_encoding) return output
def check_rst(self, rst): pub = Publisher(reader=None, parser=None, writer=None, settings=None, source_class=io.StringInput, destination_class=io.StringOutput) pub.set_components(reader_name='standalone', parser_name='restructuredtext', writer_name='pseudoxml') pub.process_programmatic_settings( settings_spec=None, settings_overrides={'output_encoding': 'unicode'}, config_section=None, ) pub.set_source(rst, source_path=None) pub.set_destination(destination=None, destination_path=None) output = pub.publish(enable_exit_status=False) self.assertLess(pub.document.reporter.max_level, 0) return output, pub
def _get_publisher(self, source_path): extra_params = { "initial_header_level": "2", "syntax_highlight": "short", "input_encoding": "utf-8", } user_params = self.settings.get("DOCUTILS_SETTINGS") if user_params: extra_params.update(user_params) pub = Publisher(destination_class=StringOutput) pub.set_components("standalone", "restructuredtext", "html") pub.writer.translator_class = HTMLTranslator pub.process_programmatic_settings(None, extra_params, None) pub.set_source(source_path=source_path) pub.publish() return pub
def _get_publisher(self, source_path): extra_params = { 'initial_header_level': '2', 'syntax_highlight': 'short', 'input_encoding': 'utf-8' } user_params = self.settings.get('DOCUTILS_SETTINGS') if user_params: extra_params.update(user_params) pub = Publisher(destination_class=StringOutput) pub.set_components('standalone', 'restructuredtext', 'html') pub.writer.translator_class = HTMLTranslator pub.process_programmatic_settings(None, extra_params, None) pub.set_source(source_path=source_path) pub.publish() return pub
def pypi_rest2html(source, output_encoding="unicode"): """ >>> pypi_rest2html("test!") u'<p>test!</p>\n' """ settings_overrides = { "raw_enabled": 0, # no raw HTML code "file_insertion_enabled": 0, # no file/URL access "halt_level": 2, # at warnings or errors, raise an exception "report_level": 5, # never report problems with the reST code } # Convert reStructuredText to HTML using Docutils. document = publish_doctree(source=source, settings_overrides=settings_overrides) for node in document.traverse(): if node.tagname == "#text": continue if node.hasattr("refuri"): uri = node["refuri"] elif node.hasattr("uri"): uri = node["uri"] else: continue o = urlparse(uri) if o.scheme not in ALLOWED_SCHEMES: raise TransformError("link scheme not allowed") # now turn the transformed document into HTML reader = readers.doctree.Reader(parser_name="null") pub = Publisher(reader, source=io.DocTreeInput(document), destination_class=io.StringOutput) pub.set_writer("html") pub.process_programmatic_settings(None, settings_overrides, None) pub.set_destination(None, None) pub.publish() parts = pub.writer.parts output = parts["body"] if output_encoding != "unicode": output = output.encode(output_encoding) return output
def render(self): key = 'cia.apps.blog.%d' % self.id parts = cache.get(key) if not parts: # Convert the reST markup to a document tree document = publish_doctree(source = self.content) visitor = ImageTranslator(document) document.walkabout(visitor) # # Publish that document tree as HTML. We can't use any of # the simpler methods in docutils.core, since we need # access to writer.parts # reader = doctree.Reader(parser_name='null') pub = Publisher(reader, None, None, source = DocTreeInput(document), destination_class = StringOutput) pub.set_writer('html4css1') pub.process_programmatic_settings(None, { 'cloak_email_addresses': True, 'initial_header_level': 2, }, None) pub.publish() parts = pub.writer.parts cache.set(key, parts) return parts
def test_syntax_extensions(file_params): """The description is parsed as a docutils commandline""" pub = Publisher(parser=Parser()) option_parser = pub.setup_option_parser() try: settings = option_parser.parse_args( shlex.split(file_params.description)).__dict__ except Exception as err: raise AssertionError( f"Failed to parse commandline: {file_params.description}\n{err}") report_stream = StringIO() settings["warning_stream"] = report_stream doctree = publish_doctree( file_params.content, parser=Parser(), settings_overrides=settings, ) file_params.assert_expected(doctree.pformat(), rstrip_lines=True)
def publish_cmdline(reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=1, argv=None, usage=default_usage, description=default_description): """ See docutils.core.publish_cmdline. We just modified this function to return the parsed destination file. """ pub = Publisher(reader, parser, writer, settings=settings) pub.set_components(reader_name, parser_name, writer_name) output = pub.publish( argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) return output, pub.settings._source, pub.settings._destination
def init_publisher(): from docutils.core import Publisher from docutils.io import StringOutput p = Publisher(destination_class=StringOutput,writer=g_writer) p.get_settings() p.set_components('standalone', 'restructuredtext', 'html') p.set_destination(None, None) return p
def _get_publisher(self, source_path): extra_params = {'initial_header_level': '2', 'syntax_highlight': 'short', 'input_encoding': 'utf-8'} user_params = self.settings.get('DOCUTILS_SETTINGS') if user_params: extra_params.update(user_params) pub = Publisher(destination_class=StringOutput) pub.set_components('standalone', 'restructuredtext', 'html') pub.writer.translator_class = HTMLTranslator pub.process_programmatic_settings(None, extra_params, None) pub.set_source(source_path=source_path) pub.publish() return pub
def read_doc(app, env, filename): # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document """Parse a document and convert to doctree.""" input_class = app.registry.get_source_input(filename) reader = SphinxStandaloneReader(app) source = input_class(app, env, source=None, source_path=filename, encoding=env.config.source_encoding) parser = app.registry.create_source_parser(app, filename) pub = Publisher(reader=reader, parser=parser, writer=SphinxDummyWriter(), source_class=SphinxDummySourceClass, destination=NullOutput()) pub.set_components(None, 'restructuredtext', None) pub.process_programmatic_settings(None, env.settings, None) pub.set_source(source, filename) pub.publish() return pub.document
def aoeuaoeuaoeu_docs(f): ''' I couldn't figure out how to use the docutils docstring parser, so I wrote my own. Can somebody show me the right way to do this? ''' raise NotImplementedError from docutils.core import Publisher from docutils.io import StringInput pub = Publisher(None, None, None, settings = settings, source_class = StringInput, destination_class = destination_class) pub.set_components('standalone', 'restructuredtext', 'pseudoxml') pub.process_programmatic_settings( settings_spec, settings_overrides, config_section) pub.set_source(f.__doc__, f.__name__) pub.set_destination(None, f.__name__) output = pub.publish(enable_exit_status = False) return output, pub return publish_parts(f.__doc__, source_class = StringInput, source_path = f.__name__)
def read_doc(app, env, filename): # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document """Parse a document and convert to doctree.""" reader = SphinxStandaloneReader(app, parsers=app.registry.get_source_parsers()) source = SphinxFileInput(app, env, source=None, source_path=filename, encoding=env.config.source_encoding) pub = Publisher(reader=reader, writer=SphinxDummyWriter(), source_class=SphinxDummySourceClass, destination=NullOutput()) pub.set_components(None, 'restructuredtext', None) pub.process_programmatic_settings(None, env.settings, None) pub.set_source(source, filename) pub.publish() return pub.document
def zotero_odf_scan_publish_cmdline_to_binary( reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=True, argv=None, usage=default_usage, description=description, destination=None, destination_class=io.BinaryFileOutput): # Tried to get internal conversion working, but using the big hammer is so much simpler. ofh = NamedTemporaryFile(mode='w+', delete=False) if len(sys.argv) > 1: txt = open(sys.argv[1]).read() oldtxt = txt txt = re.sub("{([^|}]*)\|([^|}]*)\|([^|}]*)\|([^|}]*)\|([^|}]*)}", "{\\1\\|\\2\\|\\3\\|\\4\\|\\5}", txt, re.S | re.M) ofh.write(txt) sys.argv[1] = ofh.name ofh.close() pub = Publisher(reader=reader, parser=parser, writer=writer, settings=settings, destination_class=destination_class) output = pub.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) # See above. os.unlink(ofh.name)
def _to_odp_content(self, rst, xml_filename, odp_name='/tmp/out'): reader = standalone.Reader() reader_name = 'standalone' writer = rst2odp.Writer() writer_name = 'pseudoxml' parser = None parser_name = 'restructuredtext' settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher( reader, parser, writer, # source=StringIO(rst), settings=settings, destination_class=rst2odp.BinaryFileOutput) publisher.set_components(reader_name, parser_name, writer_name) description = ('Generates OpenDocument/OpenOffice/ODF slides from ' 'standalone reStructuredText sources. ' + default_description) fin = open('/tmp/in.rst', 'w') fin.write(rst) fin.close() argv = ['--traceback', '/tmp/in.rst', odp_name] output = publisher.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status) # pull content.xml out of /tmp/out z = zipwrap.Zippier(odp_name) fout = open(xml_filename, 'w') content = preso.pretty_xml(z.cat('content.xml')) fout.write(content) fout.close() return content
def __init__(self): # publisher is used for html generation pub = Publisher(None, None, None, settings=None, source_class=StringInput, destination_class=StringOutput) pub.set_components('standalone', 'restructuredtext', 'html') pub.process_programmatic_settings(None, None, None) pub.set_destination(None, None) self.pub = pub
class HTMLGenerator: '''Really simple HTMLGenerator starting from publish_parts. It reuses the docutils.core.Publisher class, which means it is *not* threadsafe. ''' def __init__(self, settings_spec=None, settings_overrides=dict(report_level=5, halt_level=5), config_section='general'): self.pub = Publisher(reader=None, parser=None, writer=None, settings=None, source_class=io.StringInput, destination_class=io.StringOutput) self.pub.set_components(reader_name='standalone', parser_name='restructuredtext', writer_name='html') # hack: JEP-0071 does not allow HTML char entities, so we hack our way # out of it. # — == u"\u2014" # a setting to only emit charater entities in the writer would be nice # FIXME: several are emitted, and they are explicitly forbidden # in the JEP # == u"\u00a0" self.pub.writer.translator_class.attribution_formats['dash'] = ( u'\u2014', '') self.pub.process_programmatic_settings(settings_spec, settings_overrides, config_section) def create_xhtml(self, text, destination=None, destination_path=None, enable_exit_status=None): ''' Create xhtml for a fragment of IM dialog. We can use the source_name to store info about the message.''' self.pub.set_source(text, None) self.pub.set_destination(destination, destination_path) output = self.pub.publish(enable_exit_status=enable_exit_status) # kludge until we can get docutils to stop generating (rare) # entities return u'\u00a0'.join(self.pub.writer.parts['fragment'].strip().split( ' '))
def publish_string_with_traceback(reader=None,reader_name=None, parser_name=None,writer_name=None, source=None,source_path=None): """A modified version of publish_string, so I can request traceback. """ from docutils.core import Publisher from docutils import io pub = Publisher(reader=reader, source_class=io.StringInput, destination_class=io.StringOutput) pub.set_components(reader_name="python", parser_name="restructuredtext", writer_name="pseudoxml") pub.process_command_line(argv=["--traceback"]) pub.set_source(source=source, source_path=source_path) return pub.publish(enable_exit=False)
def render_rst(directory, name, meta_parser, template): # check if that file actually exists path = safe_join(directory, name + '.rst') if not os.path.exists(path): abort(404) # read file with codecs.open(path, encoding='utf-8') as fd: content = fd.read() if not template: # Strip out RST content = content.replace('.. meta::\n', '') content = content.replace('.. contents::\n\n', '') content = content.replace('.. raw:: html\n\n', '') content = content.replace('\n.. [', '\n[') content = content.replace(']_.', '].') content = content.replace(']_,', '],') content = content.replace(']_', '] ') # Change highlight formatter content = content.replace('{% highlight', "{% highlight formatter='textspec'") # Metatags for (metatag, label) in METATAG_LABELS.items(): content = content.replace(' :%s' % metatag, label) # render the post with Jinja2 to handle URLs etc. rendered_content = render_template_string(content) rendered_content = rendered_content.replace('</pre></div>', ' </pre></div>') if not template: # Send response r = make_response(rendered_content) r.mimetype = 'text/plain' return r # Render the ToC doctree = publish_doctree(source=rendered_content) bullet_list = doctree[1][1] doctree.clear() doctree.append(bullet_list) reader = Reader(parser_name='null') pub = Publisher(reader, None, None, source=io.DocTreeInput(doctree), destination_class=io.StringOutput) pub.set_writer('html') pub.publish() toc = pub.writer.parts['fragment'] # Remove the ToC from the main document rendered_content = rendered_content.replace('.. contents::\n', '') # publish the spec with docutils parts = publish_parts(source=rendered_content, source_path=directory, writer_name="html") meta = meta_parser(parts['meta']) if (directory == PROPOSAL_DIR): meta['num'] = int(name[:3]) return render_template(template, title=parts['title'], toc=toc, body=parts['fragment'], name=name, meta=meta)
class _PydocParser: def __init__(self): # Set up the instance we'll be using to render docstrings. self.errors = [] self.writer = _DocumentPseudoWriter() self.publisher = Publisher(_EpydocReader(self.errors), writer=self.writer, source_class=io.StringInput) self.publisher.set_components('standalone', 'restructuredtext', 'pseudoxml') settings_overrides={ 'report_level':10000, 'halt_level':10000, 'warning_stream':None, } self.publisher.process_programmatic_settings(None, settings_overrides, None) self.publisher.set_destination() def parse_docstring(self, docstring, errors): """Parse a docstring for eventual transformation into HTML This function is a replacement for parse_docstring from epydoc.markup.restructuredtext.parse_docstring. This function reuses the Publisher instance while the original did not. Using This function yields significantly faster WADL generation for complex systems. """ # Clear any errors from previous calls. del self.errors[:] self.publisher.set_source(docstring, None) self.publisher.publish() # Move any errors into the caller-provided list. errors[:] = self.errors[:] return ParsedRstDocstring(self.writer.document)
def main(args): print "ARGS", args argv = None reader = standalone.Reader() reader_name = 'standalone' writer = EpubWriter() writer_name = 'epub2' parser = Parser() parser_name = 'restructuredtext' settings = None settings_spec = None settings_overrides = None config_section = None enable_exit_status = 1 usage = default_usage publisher = Publisher(reader, parser, writer, settings, destination_class=EpubFileOutput) publisher.set_components(reader_name, parser_name, writer_name) description = ('Generates epub books from reStructuredText sources. ' + default_description) output = publisher.publish(argv, usage, description, settings_spec, settings_overrides, config_section=config_section, enable_exit_status=enable_exit_status)
def parts_from_doctree(document, destination_path=None, writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, enable_exit_status=None): """ Set up & run a `Publisher` to render from an existing document tree data structure, for programmatic use with string I/O. Return the encoded string output. Note that document.settings is overridden; if you want to use the settings of the original `document`, pass settings=document.settings. Also, new document.transformer and document.reporter objects are generated. For encoded string output, be sure to set the 'output_encoding' setting to the desired encoding. Set it to 'unicode' for unencoded Unicode string output. Here's one way:: publish_from_doctree( ..., settings_overrides={'output_encoding': 'unicode'}) Parameters: `document` is a `docutils.nodes.document` object, an existing document tree. Other parameters: see `publish_programmatically`. """ reader = docutils.readers.doctree.Reader(parser_name='null') pub = Publisher(reader, None, writer, source=io.DocTreeInput(document), destination_class=io.StringOutput, settings=settings) if not writer and writer_name: pub.set_writer(writer_name) pub.process_programmatic_settings( settings_spec, settings_overrides, config_section) pub.set_destination(None, destination_path) pub.publish(enable_exit_status=enable_exit_status) return pub.writer.parts