Esempio n. 1
0
def convert_to_body_resources(notebook_filename, exporter_class=PDFExporter):
    """Convert notebook to body and resources... replaces markdown local images on the way."""
    ## Read the actual notebook
    notebook = nbformat.read(notebook_filename, as_version=4)
    notebook, resources = preprocess_markdown_local_images(
        notebook, notebook_filename)

    # Overwrite article style
    dl = DictLoader({
        'article.tplx':
        """
    % Default to the notebook output style
    ((* if not cell_style is defined *))
        ((* set cell_style = 'style_ipython.tplx' *))
    ((* endif *))

    % Inherit from the specified cell style.
    ((* extends cell_style *))


    %===============================================================================
    % Latex Article
    %===============================================================================

    ((* block docclass *))
    % In case you want to change it
    \documentclass{article}
    ((* endblock docclass *))
    
    ((* block header *))
        ((( super() )))
        % Indentation, no indetation for paragraphs, but blank lines
        \setlength{\parskip}{\medskipamount}
        \setlength{\parindent}{0pt}

    ((* endblock header *))
    """
    })

    if exporter_class is None:
        exporter = LatexExporter(extra_loaders=[dl])
    else:
        exporter = exporter_class(extra_loaders=[dl])
    (body, resources) = exporter.from_notebook_node(notebook,
                                                    resources=resources)
    return body, resources
def convert_to_body_resources(notebook_filename, exporter_class=PDFExporter):
    """Convert notebook to body and resources... replaces markdown local images on the way."""
    ## Read the actual notebook
    notebook = nbformat.read(notebook_filename, as_version=4)
    notebook, resources = preprocess_markdown_local_images(notebook, notebook_filename)
    
    # Overwrite article style    
    dl = DictLoader({'article.tplx':
    """
    % Default to the notebook output style
    ((* if not cell_style is defined *))
        ((* set cell_style = 'style_ipython.tplx' *))
    ((* endif *))

    % Inherit from the specified cell style.
    ((* extends cell_style *))


    %===============================================================================
    % Latex Article
    %===============================================================================

    ((* block docclass *))
    % In case you want to change it
    \documentclass{article}
    ((* endblock docclass *))
    
    ((* block header *))
        ((( super() )))
        % Indentation, no indetation for paragraphs, but blank lines
        \setlength{\parskip}{\medskipamount}
        \setlength{\parindent}{0pt}

    ((* endblock header *))
    """})
        
    if exporter_class is None:
        exporter = LatexExporter(extra_loaders=[dl])
    else:
        exporter = exporter_class(extra_loaders=[dl])
    (body, resources) = exporter.from_notebook_node(notebook,resources=resources)
    return body, resources
Esempio n. 3
0
def convert_to_body_resources(notebook_filename, bibtex_filename):
    ## Initializing resources to have correct output directory
    notebook_name = notebook_filename.split('/')[-1].replace('.ipynb', '')
    #see https://github.com/jupyter/nbconvert/blob/fcc3a831295b373a7a9ee5e8e0dea175475f8f26/nbconvert/nbconvertapp.py#L288
    resources = {}
    #resources['config_dir'] = self.config_dir
    resources['unique_key'] = notebook_name
    resources['output_files_dir'] = '%s_files' % notebook_name

    own_notebook = nbformat.read(notebook_filename, as_version=4)

    # replace cite keys by bibtex citekeys:
    if bibtex_filename is not None and 'cite2c' in own_notebook['metadata']:
        with open(bibtex_filename, 'r') as bibtex_file:
            bibtex = bibtexparser.load(bibtex_file)
        cite2_key_to_bibtex_key = dict()
        for key, cite2c_entry in own_notebook['metadata']['cite2c']['citations'].iteritems():
            equal_bibtex_entries = [b for b in bibtex.entries if cite2c_bibtex_equal(cite2c_entry,b)]
            if (len(equal_bibtex_entries) == 1):
                cite2_key_to_bibtex_key[key] = equal_bibtex_entries[0]['ID']
            assert len(equal_bibtex_entries) < 2, ("expected at most "
                "one equal bibtex entry, got {:s}".format(str(equal_bibtex_entries)))
        for cell in own_notebook['cells']:
            for cite2_key, bibtex_key in cite2_key_to_bibtex_key.iteritems():
                if cell['cell_type'] == 'markdown':
                    cell['source'] = cell['source'].replace(cite2_key, bibtex_key)
   
    # find imgs and convert to resources
    resources['outputs'] = dict()
    for cell in own_notebook['cells']:
        if cell['cell_type'] == 'markdown':
            img_urls_filenames = re.findall(r"<img.*src=\"([^>]*/([^\.]*\.[a-z]*)[^\"]*)\"[^>]*>[^<]*</img>", cell['source'])
            for url, img_filename in img_urls_filenames:
                response = requests.get(url, stream=True)
                all_blocks = []
                if not response.ok:
                    continue
                for block in response.iter_content(1024):
                    all_blocks.append(block)
                data = ''.join(all_blocks)
                if img_filename.endswith('svg'):
                    svg_2_pdf = SVG2PDFPreprocessor()
                    pdfdata = svg_2_pdf.convert_figure(None, data)
                    pdfdata = base64.decodestring(pdfdata) # it is encoded by svg2pdfpreproc.. not sure if decoding is necessary
                    img_filename = img_filename.replace('.svg', '.pdf')
                    data = pdfdata
                if img_filename.endswith('gif'):
                    jpgdata = gif_to_jpg(data)
                    img_filename = img_filename.replace('.gif', '.jpg')
                    data = jpgdata
                resource_key = os.path.join(resources['output_files_dir'], img_filename)
                
                resources['outputs'][resource_key] = data
                
            # Replace the whole image tag by latex code with the correct filename
            cell['source'] = re.sub(r"<img.*src=\"[^>]*/([^\.]*\.[a-z]*)[^>]*>[^<]*</img>", 
                   "\\\\begin{center}\n" +
                   "\\\\adjustimage{max size={0.9\\linewidth}{0.9\\paperheight}}{" + 
                   resources['output_files_dir'] + "/" +
                   r"\1" + # here is the filename
                   "}\n"+ 
                   "\\end{center}\n",
                   cell['source'])
            
            cell['source'] = cell['source'].replace('.svg', '.pdf')
            cell['source'] = cell['source'].replace('.gif', '.jpg')
                       
                       
    # remove javascript/html outputs
    for cell in own_notebook['cells']:
        if cell['cell_type'] == 'code' and 'outputs' in cell:
            cell['outputs'] = remove_javascript_html_outputs(cell['outputs'])
        
        
    
    # do some custom replacements of html
    for cell in own_notebook['cells']:
        if cell['cell_type'] == 'markdown':
            cell['source'] = cell['source'].replace('<span class="todecide">', '\\begin{comment}\nTODECIDE\n')
            cell['source'] = cell['source'].replace('<span class="todo">', '\\begin{comment}\nTODO\n')
            cell['source'] = cell['source'].replace('</span>', '\n\\end{comment}\n')

            cell['source'] = cell['source'].replace("<div class=\"summary\">", "\\begin{keypointbox}")
            cell['source'] = cell['source'].replace("</div>", "\\end{keypointbox}")

            cell['source'] = cell['source'].replace("<li>", "\\item ")
            cell['source'] = cell['source'].replace("</li>", "").replace("<ul>", "").replace("</ul>", "")

    dl = DictLoader({'article.tplx':
    """
    ((*- extends 'base.tplx' -*))
    ((* block header *))
    ((* endblock header *))
    
    % only part-document, not complete document, so call not base constructor, but the one above
    ((* block body *))
        ((( super.super() )))
    ((* endblock body *))

    % is this removing code? unclear.. probaby removing stdout/stdin
    ((* block stream *))
    ((* endblock stream *))

    % Remove execute result stuff
    ((* block execute_result scoped *))
    ((* endblock execute_result *))


    ((* macro draw_figure(filename) -*))
    ((* set filename = filename | posix_path *))
    ((*- block figure scoped -*))

        %\\begin{figure}[ht]
        \\begin{center}
        \\adjustimage{max size={0.9\\linewidth}{0.9\\paperheight}}{((( filename )))}
        \\end{center}
        %\\end{figure}
        %{ \\hspace*{\\fill} \\\\}
    ((*- endblock figure -*))
    ((*- endmacro *))


    ((* block markdowncell scoped *))
    ((( cell.source | citation2latex | strip_files_prefix | markdown2latex(extra_args=["--chapters"]) )))
    ((* endblock markdowncell *))


    """})


    exportLatex = LatexExporter(extra_loaders=[dl])
    (body, resources) = exportLatex.from_notebook_node(own_notebook,resources=resources)
    
    # postprocess url links with footnotes
    body = re.sub(r"(\\href{([^}]*)}{[^}]*})", r"\1\\footnote{\\url{\2}}", body)
    return body, resources