import copy import json import base64 from collections import namedtuple from queue import Empty from traitlets import HasTraits from jupyter_client.manager import start_new_kernel from nbconvert.utils.base import NbConvertBase from pandocfilters import RawBlock, Div, CodeBlock, Image, Str, Para import pypandoc from .exc import StitchError from . import options as opt DISPLAY_PRIORITY = NbConvertBase().display_data_priority CODE = 'code' CODEBLOCK = 'CodeBlock' OUTPUT_FORMATS = ['html', 'latex'] HERE = os.path.dirname(__file__) KernelPair = namedtuple("KernelPair", "km kc") CODE_CHUNK_XPR = re.compile(r'^```{\w+.*}|^```\w+') # -------- # User API # -------- class Stitch(HasTraits): '''
def wrap_output(self, chunk_name, messages, execution_count, kp, attrs): ''' Wrap the messages of a code-block. Parameters ---------- chunk_name : str messages : list of dicts execution_count : int or None kp : KernelPair attrs : dict options from the source options-line. Returns ------- output_blocks : list Notes ----- Messages printed to stdout are wrapped in a CodeBlock. Messages publishing mimetypes (e.g. matplotlib figures) resuse Jupyter's display priority. See ``NbConvertBase.display_data_priority``. The result should be pandoc JSON AST compatible. ''' # messsage_pairs can come from stdout or the io stream (maybe others?) output_messages = [x for x in messages if not is_execute_input(x)] display_messages = [x for x in output_messages if not is_stdout(x) and not is_stderr(x)] output_blocks = [] # Handle all stdout first... for message in output_messages: warning = self.get_option('warning', attrs) if is_stdout(message) or (is_stderr(message) and warning): text = message['content']['text'] output_blocks.append(plain_output(text)) order = dict( (x[1], x[0]) for x in enumerate(NbConvertBase().display_data_priority) ) for message in display_messages: if message['header']['msg_type'] == 'error': error = self.get_option('error', attrs) if error == 'raise': exc = StitchError(message['content']['traceback']) raise exc block = plain_output('\n'.join(message['content']['traceback'])) else: all_data = message['content']['data'] key = min(all_data.keys(), key=lambda k: order[k]) data = all_data[key] if self.to in ('latex', 'pdf', 'beamer'): if 'text/latex' in all_data.keys(): key = 'text/latex' data = all_data[key] if key == 'text/plain': # ident, classes, kvs block = plain_output(data) elif key == 'text/latex': block = RawBlock('latex', data) elif key == 'text/html': block = RawBlock('html', data) elif key.startswith('image'): block = self.wrap_image_output(chunk_name, data, key, attrs) else: block = tokenize_block(data) output_blocks.append(block) return output_blocks
def wrap_output(self, chunk_name, messages, attrs): """ Wrap the messages of a code-block. Parameters ---------- chunk_name : str messages : list of dicts attrs : dict options from the source options-line. Returns ------- output_blocks : list Notes ----- Messages printed to stdout are wrapped in a CodeBlock. Messages publishing mimetypes (e.g. matplotlib figures) resuse Jupyter's display priority. See ``NbConvertBase.display_data_priority``. The result should be pandoc JSON AST compatible. """ # set parser options results = self.get_option('results', attrs) pandoc_format = self.pandoc_format pandoc_extra_args = self.pandoc_extra_args pandoc = False if re.match(r'^pandoc(\s|$)', results): pandoc = True results_args = results[7:].split( ) # this also removes all \s after pandoc if results_args: parser = argparse.ArgumentParser() parser.add_argument('-r', '-f', '--read', '--from') read, context = parser.parse_known_args(results_args) pandoc_format = read.read if read.read else "markdown" pandoc_extra_args = context if context else None if re.match(r'^(markdown|gfm|commonmark)', pandoc_format): md_format, md_extra_args = pandoc_format, pandoc_extra_args elif re.match(r'^(markdown|gfm|commonmark)', self.pandoc_format): md_format, md_extra_args = self.pandoc_format, self.pandoc_extra_args else: md_format, md_extra_args = 'markdown', None # messsage_pairs can come from stdout or the io stream (maybe others?) output_messages = [x for x in messages if not is_execute_input(x)] display_messages = [ x for x in output_messages if not is_stdout(x) and not is_stderr(x) ] output_blocks = [] # Handle all stdout first... for message in output_messages: is_warning = is_stderr(message) and self.get_option( 'warning', attrs) if is_stdout(message) or is_warning: text = message['content']['text'] output_blocks += (plain_output(text) if is_warning else plain_output(text, pandoc_format, pandoc_extra_args, pandoc)) priority = list(enumerate(NbConvertBase().display_data_priority)) priority.append((len(priority), 'application/javascript')) order = dict((x[1], x[0]) for x in priority) for message in display_messages: if message['header']['msg_type'] == 'error': error = self.get_option('error', attrs) if error == 'raise': exc = KnittyError(message['content']['traceback']) raise exc blocks = plain_output('\n'.join( message['content']['traceback'])) else: all_data = message['content']['data'] if not all_data: # some R output continue key = min(all_data.keys(), key=lambda k: order[k]) data = all_data[key] if self.filter_to in ('latex', 'beamer'): if 'text/latex' in all_data.keys(): key = 'text/latex' data = all_data[key] if key == 'text/plain': # ident, classes, kvs blocks = plain_output(data, pandoc_format, pandoc_extra_args, pandoc) elif key == 'text/latex': blocks = [RawBlock('latex', data)] elif key == 'text/html': blocks = [RawBlock('html', data)] elif key == 'application/javascript': script = '<script type=text/javascript>{}</script>'.format( data) blocks = [RawBlock('html', script)] elif key.startswith('image') or key == 'application/pdf': blocks = [ self.wrap_image_output(chunk_name, data, key, attrs) ] elif key == 'text/markdown': blocks = tokenize_block(data, md_format, md_extra_args) else: blocks = tokenize_block(data, pandoc_format, pandoc_extra_args) output_blocks += blocks return output_blocks