def __init__(self, title, raw_code, globs, locs): self.title = title self.raw_code = raw_code self.globs = globs self.locs = locs dt_parser = DocTestParser() doctests = dt_parser.get_examples(raw_code) if any(doctests): self.code = DocTest(examples=doctests, globs=self.globs, name=title, filename=None, lineno=None, docstring=None) else: self.code = compile(raw_code, title, "exec")
def __init__(self, title, raw_code, globs, locs): self.title = title self.raw_code = raw_code self.globs = globs self.locs = locs dt_parser = DocTestParser() doctests = dt_parser.get_examples(raw_code) if any(doctests): self.code = DocTest( examples=doctests, globs=self.globs, name=title, filename=None, lineno=None, docstring=None) else: self.code = compile(raw_code, title, "exec")
class ReST2IPyNB(object): ipynb_template = { "metadata": {"name": ""}, "nbformat": 3, "nbformat_minor": 0, "worksheets": [{"cells": [], "metadata": {}}] } codecell_template = { "cell_type": "code", "collapsed": False, "input": [], "language": "python", "metadata": {}, "outputs": [] } markdowncell_template = { "cell_type": "markdown", "metadata": {}, "source": [] } headingcell_template = { "cell_type": "heading", "level": None, "metadata": {}, "source": [] } def __init__(self, baseurl='', apiref_baseurl='', glossary_baseurl='', doctree_parser_settings=None): self._doctree_parser_settings = None if doctree_parser_settings is None: # shut up self._doctree_parser_settings = {'report_level': 5} self._baseurl = baseurl self._apiref_baseurl = apiref_baseurl self._glossary_baseurl = glossary_baseurl self._doctest_parser = DocTestParser() self._reset_state() def _reset_state(self): self._state = {'sec_depth': 1, 'indent': 0, 'in_markdowncell': False, 'need_new_codecell': False, 'need_hanging_indent': False} self._currcell = None self._buffer = '' self._notebook = None self._filename = None def __call__(self, filename): self._filename = filename rest = open(filename).read() doc = prep_rest(rest) self._notebook = deepcopy(ReST2IPyNB.ipynb_template) doctree = publish_doctree( doc, settings_overrides=self._doctree_parser_settings) self._currcells = self._notebook['worksheets'][0]['cells'] self._parse(doctree) self._store_currcell() notebook = self._notebook self._reset_state() return notebook def _ref2apiref(self, reftext): apiref_baseurl = self._apiref_baseurl # try to determine what kind of ref we got if reftext.startswith(':'): rtype, ref = re.match(':([a-z]+):(.*)', reftext).groups() else: rtype = None ref = reftext if rtype is None: # function? if ref.endswith('()'): rtype = 'func' ref = ref[:-2] refid = ref.lstrip('~').rstrip('()') if rtype == 'meth': ref_url = '%s/%s.html#%s' % (apiref_baseurl, '.'.join(refid.split('.')[:-1]), refid) else: ref_url = '%s/%s.html#%s' % (apiref_baseurl, refid, refid.replace('.', '-').replace('_', '-').lower()) ref_label = None if ref.startswith('~'): if rtype == 'meth': ref_label = '%s()' % '.'.join(refid.split('.')[-2:]) elif rtype == 'func': ref_label = '%s()' % refid.split('.')[-1] else: ref_label = '%s' % refid.split('.')[-1] return '[%s](%s)' % (ref_label, ref_url) def _parse(self, doctree): for child in doctree.children: tag = child.tagname if tag == 'title': self._add_headingcell(self._state['sec_depth']) self._parse(child) if not len(self._notebook['metadata']['name']): self._notebook['metadata']['name'] = self._buffer elif tag == '#text': self._add2buffer(child.astext()) elif tag == 'paragraph': self._add_markdowncell() if self._state['need_hanging_indent']: self._state['need_hanging_indent'] = False else: self._add2buffer('', newline=True, paragraph=True) self._flush_buffer() self._parse(child) # FIXME: literal_block likely needs better handling elif tag == 'literal_block': self._add_markdowncell() if self._state['need_hanging_indent']: self._state['need_hanging_indent'] = False else: self._add2buffer('', newline=True, paragraph=True) self._flush_buffer() self._parse(child) elif tag == 'inline': print("warning, no idea how to handle ``inline``") # FIXME: elif tag == 'raw': self._add_markdowncell() self._flush_buffer() self._currcell['source'].insert(0, child.astext()) self._store_currcell() elif tag == 'doctest_block': self._add_codecell() needs_new_codecell = False for ex in self._doctest_parser.get_examples(child.rawsource): if needs_new_codecell: self._add_codecell() self._add2buffer('%s%s' % (' ' * ex.indent, ex.source), newline=False) self._flush_buffer(startnew=True) needs_new_codecell = len(ex.want) > 0 elif tag == 'section': self._state['sec_depth'] += 1 self._parse(child) self._state['sec_depth'] -= 1 elif tag == 'note': self._add_markdowncell(force=True) self._parse(child) self._flush_buffer() self._currcell['source'].insert(0, '- - -\n*Note*') self._currcell['source'].append('- - -\n') self._store_currcell() elif tag == 'title_reference': self._flush_buffer() self._parse(child) if self._buffer.startswith(':term:'): # link to glossary term = re.match('.*<(.*)>', self._buffer) if term is None: term = re.match(':term:(.*)', self._buffer).groups()[0] term_text = term else: term = term.groups()[0] term_text = re.match(':term:(.*) <', self._buffer).groups()[0] self._buffer = '[%s](%s#term-%s)' % (term_text, self._glossary_baseurl, term.lower().replace(' ', '-')) elif self._buffer.startswith('~mvpa') \ or self._buffer.startswith('mvpa') \ or self._buffer.startswith(':meth:') \ or self._buffer.startswith(':mod:') \ or self._buffer.startswith(':class:') \ or self._buffer.startswith(':func:'): # various API reference link variants self._buffer = self._ref2apiref(self._buffer) # XXX for the rest I have no idea how to link them without huge # effort elif self._buffer.startswith(':ref:'): self._buffer = '*%s*' \ % [m for m in re.match(':ref:(.*) <|:ref:(.*)', self._buffer).groups() if not m is None][0] elif self._buffer.startswith(':math:'): self._buffer = '$$%s$$' % self._buffer elif re.match(':([a-z]+):', self._buffer): # catch other ref type we should handle, but do not yet raise RuntimeError("unhandled reference type '%s'" % self._buffer) else: # plain refs seems to be mostly used for external API self._buffer = '`%s`' % self._buffer elif tag == 'emphasis': self._flush_buffer() self._parse(child) self._buffer = '*%s*' % self._buffer elif tag == 'strong': self._flush_buffer() self._parse(child) self._buffer = '**%s**' % self._buffer elif tag == 'literal': # strip one layer of backticks self._add2buffer(child.rawsource[1:-1]) elif tag == 'problematic': print 'PROBLEMATIC: %s' % child self._parse(child) elif tag == 'reference': self._flush_buffer() self._parse(child) self._buffer = '[%s][%s]' % (self._buffer, child.attributes['name']) elif tag in ['comment', 'target']: pass elif tag == 'definition_list': self._add_markdowncell() for item in child.children: self._flush_buffer() self._parse(item.children[0]) term = self._buffer self._buffer = '' self._parse(item.children[1]) self._buffer = '\n%s: %s' % (term, self._buffer) elif tag in ['enumerated_list', 'bullet_list']: self._add_markdowncell() for i, item in enumerate(child.children): if tag == 'enumerated_list': prefix = '%i.' % (i + 1,) else: prefix = '*' self._flush_buffer() self._add2buffer('%s ' % prefix, newline=True, paragraph=True) self._state['indent'] += 4 self._state['need_hanging_indent'] = True self._parse(item) self._state['indent'] -= 4 self._flush_buffer() elif tag == 'list_item': for c in child.children: self._parse(c) elif tag == 'term': self._parse(child.children[0]) elif tag == 'figure': # this can't be expressed in markdown self._flush_buffer() file_url = '%s/%s.html' % (self._baseurl, os.path.splitext(os.path.basename(self._filename))[0]) self._add2buffer('\[Visit [%s](%s) to view this figure\]' % (file_url, file_url), newline=True, paragraph=True) elif tag == 'block_quote': self._flush_buffer() first_line = len(self._currcell['source']) # skip the wrapping paragraph self._parse(child.children[0]) self._flush_buffer() self._currcell['source'][first_line] = \ '\n\n> %s' % self._currcell['source'][first_line] elif tag == 'system_message': if child.attributes['type'] == 'INFO': pass elif child.children[0].astext() == 'Unknown directive type "exercise".': exercise_text = \ '\n'.join([l.strip() for l in child.children[1][0].astext().split('\n')][2:]) self._add_markdowncell(force=True) self._parse(publish_doctree( exercise_text, settings_overrides=self._doctree_parser_settings)) self._flush_buffer() self._currcell['source'].insert(0, '- - -\n*Exercise*') self._add_codecell() self._add2buffer('# you can use this cell to for this exercise\n') self._add_markdowncell() self._currcell['source'].append('- - -\n') elif child.children[0].astext() == 'Unknown directive type "todo".': pass elif child.children[0].astext() == 'Unknown directive type "tikz".': pass elif child.children[0].astext() == 'Unknown directive type "ipython".': python_code = \ '\n'.join([l.strip() for l in child.children[1][0].astext().split('\n')][2:]) self._flush_buffer() self._add_codecell() self._currcell['input'].insert(0, python_code) self._store_currcell() else: raise RuntimeError("cannot handle system message '%s'" % child.astext()) else: if hasattr(child, 'line') and child.line: line = ' on line %i' % child.line else: line = '' raise RuntimeError("Unknown tag '%s'%s" % (tag, line)) def _store_currcell(self): if not self._currcell is None: self._flush_buffer() if self._currcell['cell_type'] == 'code': # remove last newline to save on vertical space self._currcell['input'][-1] = self._currcell['input'][-1].rstrip('\n') self._currcells.append(self._currcell) self._currcell = None def _add_headingcell(self, level): self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.headingcell_template) self._currcell['level'] = level def _add_codecell(self): self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.codecell_template) def _add_markdowncell(self, force=False): if self._currcell is None \ or not self._currcell['cell_type'] == 'markdown' \ or force: # we need a new cell self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.markdowncell_template) def _add2buffer(self, value, newline=False, paragraph=False): if paragraph: nl = '\n\n' else: nl = '\n' if newline: self._buffer += '%s%s%s' % (nl, ' ' * self._state['indent'], value) else: self._buffer += value def _flush_buffer(self, startnew=True): if not len(self._buffer): return if self._currcell['cell_type'] == 'code': target_field = 'input' else: target_field = 'source' if startnew or not len(self._currcell[target_field]): self._currcell[target_field].append(self._buffer) else: self._currcell[target_field][-1] += self._buffer self._buffer = ''
class ReST2IPyNB(object): ipynb_template = { "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [{ "cells": [], "metadata": {} }] } codecell_template = { "cell_type": "code", "collapsed": False, "input": [], "language": "python", "metadata": {}, "outputs": [] } markdowncell_template = { "cell_type": "markdown", "metadata": {}, "source": [] } headingcell_template = { "cell_type": "heading", "level": None, "metadata": {}, "source": [] } def __init__(self, baseurl='', apiref_baseurl='', glossary_baseurl='', doctree_parser_settings=None): self._doctree_parser_settings = None if doctree_parser_settings is None: # shut up self._doctree_parser_settings = {'report_level': 5} self._baseurl = baseurl self._apiref_baseurl = apiref_baseurl self._glossary_baseurl = glossary_baseurl self._doctest_parser = DocTestParser() self._reset_state() def _reset_state(self): self._state = { 'sec_depth': 1, 'indent': 0, 'in_markdowncell': False, 'need_new_codecell': False, 'need_hanging_indent': False } self._currcell = None self._buffer = '' self._notebook = None self._filename = None def __call__(self, filename): self._filename = filename rest = open(filename).read() doc = prep_rest(rest) self._notebook = deepcopy(ReST2IPyNB.ipynb_template) doctree = publish_doctree( doc, settings_overrides=self._doctree_parser_settings) self._currcells = self._notebook['worksheets'][0]['cells'] self._parse(doctree) self._store_currcell() notebook = self._notebook self._reset_state() return notebook def _ref2apiref(self, reftext): apiref_baseurl = self._apiref_baseurl # try to determine what kind of ref we got if reftext.startswith(':'): rtype, ref = re.match(':([a-z]+):(.*)', reftext).groups() else: rtype = None ref = reftext if rtype is None: # function? if ref.endswith('()'): rtype = 'func' ref = ref[:-2] refid = ref.lstrip('~').rstrip('()') if rtype == 'meth': ref_url = '%s/%s.html#%s' % (apiref_baseurl, '.'.join( refid.split('.')[:-1]), refid) else: ref_url = '%s/%s.html#%s' % (apiref_baseurl, refid, refid.replace('.', '-').replace( '_', '-').lower()) ref_label = None if ref.startswith('~'): if rtype == 'meth': ref_label = '%s()' % '.'.join(refid.split('.')[-2:]) elif rtype == 'func': ref_label = '%s()' % refid.split('.')[-1] else: ref_label = '%s' % refid.split('.')[-1] return '[%s](%s)' % (ref_label, ref_url) def _parse(self, doctree): for child in doctree.children: tag = child.tagname if tag == 'title': self._add_headingcell(self._state['sec_depth']) self._parse(child) if not len(self._notebook['metadata']['name']): self._notebook['metadata']['name'] = self._buffer elif tag == '#text': self._add2buffer(child.astext()) elif tag == 'paragraph': self._add_markdowncell() if self._state['need_hanging_indent']: self._state['need_hanging_indent'] = False else: self._add2buffer('', newline=True, paragraph=True) self._flush_buffer() self._parse(child) # FIXME: literal_block likely needs better handling elif tag == 'literal_block': self._add_markdowncell() if self._state['need_hanging_indent']: self._state['need_hanging_indent'] = False else: self._add2buffer('', newline=True, paragraph=True) self._flush_buffer() self._parse(child) elif tag == 'inline': print("warning, no idea how to handle ``inline``") # FIXME: elif tag == 'raw': self._add_markdowncell() self._flush_buffer() self._currcell['source'].insert(0, child.astext()) self._store_currcell() elif tag == 'doctest_block': self._add_codecell() needs_new_codecell = False for ex in self._doctest_parser.get_examples(child.rawsource): if needs_new_codecell: self._add_codecell() self._add2buffer('%s%s' % (' ' * ex.indent, ex.source), newline=False) self._flush_buffer(startnew=True) needs_new_codecell = len(ex.want) > 0 elif tag == 'section': self._state['sec_depth'] += 1 self._parse(child) self._state['sec_depth'] -= 1 elif tag == 'note': self._add_markdowncell(force=True) self._parse(child) self._flush_buffer() self._currcell['source'].insert(0, '- - -\n*Note*') self._currcell['source'].append('- - -\n') self._store_currcell() elif tag == 'title_reference': self._flush_buffer() self._parse(child) if self._buffer.startswith(':term:'): # link to glossary term = re.match('.*<(.*)>', self._buffer) if term is None: term = re.match(':term:(.*)', self._buffer).groups()[0] term_text = term else: term = term.groups()[0] term_text = re.match(':term:(.*) <', self._buffer).groups()[0] self._buffer = '[%s](%s#term-%s)' % ( term_text, self._glossary_baseurl, term.lower().replace(' ', '-')) elif self._buffer.startswith('~mvpa') \ or self._buffer.startswith('mvpa') \ or self._buffer.startswith(':meth:') \ or self._buffer.startswith(':mod:') \ or self._buffer.startswith(':class:') \ or self._buffer.startswith(':func:'): # various API reference link variants self._buffer = self._ref2apiref(self._buffer) # XXX for the rest I have no idea how to link them without huge # effort elif self._buffer.startswith(':ref:'): self._buffer = '*%s*' \ % [m for m in re.match(':ref:(.*) <|:ref:(.*)', self._buffer).groups() if not m is None][0] elif self._buffer.startswith(':math:'): self._buffer = '$$%s$$' % self._buffer elif re.match(':([a-z]+):', self._buffer): # catch other ref type we should handle, but do not yet raise RuntimeError("unhandled reference type '%s'" % self._buffer) else: # plain refs seems to be mostly used for external API self._buffer = '`%s`' % self._buffer elif tag == 'emphasis': self._flush_buffer() self._parse(child) self._buffer = '*%s*' % self._buffer elif tag == 'strong': self._flush_buffer() self._parse(child) self._buffer = '**%s**' % self._buffer elif tag == 'literal': # strip one layer of backticks self._add2buffer(child.rawsource[1:-1]) elif tag == 'problematic': print 'PROBLEMATIC: %s' % child self._parse(child) elif tag == 'reference': self._flush_buffer() self._parse(child) self._buffer = '[%s][%s]' % (self._buffer, child.attributes['name']) elif tag in ['comment', 'target']: pass elif tag == 'definition_list': self._add_markdowncell() for item in child.children: self._flush_buffer() self._parse(item.children[0]) term = self._buffer self._buffer = '' self._parse(item.children[1]) self._buffer = '\n%s: %s' % (term, self._buffer) elif tag in ['enumerated_list', 'bullet_list']: self._add_markdowncell() for i, item in enumerate(child.children): if tag == 'enumerated_list': prefix = '%i.' % (i + 1, ) else: prefix = '*' self._flush_buffer() self._add2buffer('%s ' % prefix, newline=True, paragraph=True) self._state['indent'] += 4 self._state['need_hanging_indent'] = True self._parse(item) self._state['indent'] -= 4 self._flush_buffer() elif tag == 'list_item': for c in child.children: self._parse(c) elif tag == 'term': self._parse(child.children[0]) elif tag == 'figure': # this can't be expressed in markdown self._flush_buffer() file_url = '%s/%s.html' % ( self._baseurl, os.path.splitext(os.path.basename(self._filename))[0]) self._add2buffer('\[Visit [%s](%s) to view this figure\]' % (file_url, file_url), newline=True, paragraph=True) elif tag == 'block_quote': self._flush_buffer() first_line = len(self._currcell['source']) # skip the wrapping paragraph self._parse(child.children[0]) self._flush_buffer() self._currcell['source'][first_line] = \ '\n\n> %s' % self._currcell['source'][first_line] elif tag == 'system_message': if child.attributes['type'] == 'INFO': pass elif child.children[0].astext( ) == 'Unknown directive type "exercise".': exercise_text = \ '\n'.join([l.strip() for l in child.children[1][0].astext().split('\n')][2:]) self._add_markdowncell(force=True) self._parse( publish_doctree( exercise_text, settings_overrides=self._doctree_parser_settings)) self._flush_buffer() self._currcell['source'].insert(0, '- - -\n*Exercise*') self._add_codecell() self._add2buffer( '# you can use this cell to for this exercise\n') self._add_markdowncell() self._currcell['source'].append('- - -\n') elif child.children[0].astext( ) == 'Unknown directive type "todo".': pass elif child.children[0].astext( ) == 'Unknown directive type "tikz".': pass elif child.children[0].astext( ) == 'Unknown directive type "ipython".': python_code = \ '\n'.join([l.strip() for l in child.children[1][0].astext().split('\n')][2:]) self._flush_buffer() self._add_codecell() self._currcell['input'].insert(0, python_code) self._store_currcell() else: raise RuntimeError("cannot handle system message '%s'" % child.astext()) else: if hasattr(child, 'line') and child.line: line = ' on line %i' % child.line else: line = '' raise RuntimeError("Unknown tag '%s'%s" % (tag, line)) def _store_currcell(self): if not self._currcell is None: self._flush_buffer() if self._currcell['cell_type'] == 'code': # remove last newline to save on vertical space self._currcell['input'][-1] = self._currcell['input'][ -1].rstrip('\n') self._currcells.append(self._currcell) self._currcell = None def _add_headingcell(self, level): self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.headingcell_template) self._currcell['level'] = level def _add_codecell(self): self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.codecell_template) def _add_markdowncell(self, force=False): if self._currcell is None \ or not self._currcell['cell_type'] == 'markdown' \ or force: # we need a new cell self._store_currcell() self._currcell = deepcopy(ReST2IPyNB.markdowncell_template) def _add2buffer(self, value, newline=False, paragraph=False): if paragraph: nl = '\n\n' else: nl = '\n' if newline: self._buffer += '%s%s%s' % (nl, ' ' * self._state['indent'], value) else: self._buffer += value def _flush_buffer(self, startnew=True): if not len(self._buffer): return if self._currcell['cell_type'] == 'code': target_field = 'input' else: target_field = 'source' if startnew or not len(self._currcell[target_field]): self._currcell[target_field].append(self._buffer) else: self._currcell[target_field][-1] += self._buffer self._buffer = ''