/
py2ipynb.py
107 lines (88 loc) · 3.99 KB
/
py2ipynb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import argparse
import nbformat
from nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook
from IPython.nbformat import v3, v4
import codecs
from os import linesep
CELLMARKS = {"pycharm": "##",
"spyder": "#%%"}
def parsePy(py_filename, cellmark_style, other_ignores=[]):
"""Converts a .py file to a V.4 .ipynb notebook using special cell markers.
:param py_filename: .py filename
:param cellmark_style: Determines cell marker based on IDE, {"pycharm": "##", "spyder": "#%%"}
:param other_ignores: Other lines to ignore
:return: A string containing one or more lines
"""
ignores = ['"""', "'''"] + CELLMARKS.values() + other_ignores
with open(py_filename, "r") as f:
lines = []
codecell = True
metadata = {"slideshow": {"slide_type": "slide"}}
for l in f:
l1 = l.strip()
if lines and ((l1.startswith('# In[') and l1.endswith(']:')) or l1 == CELLMARKS[cellmark_style]):
yield (codecell, metadata, "".join(lines).strip(linesep))
lines = []
codecell = True
metadata = {"slideshow": {"slide_type": "slide"}}
continue
if l1 in ("#md", "# md", "#markdown", "# markdown"):
codecell = False
continue
if l1.startswith("#slide:") or l1.startswith("# slide:"):
slidetype = l1.split(":")[-1].strip()
slidetype = slidetype.strip(linesep)
metadata["slideshow"]["slide_type"] = slidetype
continue
if "%matplotlib" in l1:
l = l.strip()[1:].strip()
if l1 not in ignores:
lines.append(l)
if lines:
yield (codecell, metadata, "".join(lines).strip(linesep))
def py2ipynb(input, output, cellmark_style, other_ignores=[]):
"""Converts a .py file to a V.4 .ipynb notebook usiing `parsePy` function
:param input: Input .py filename
:param output: Output .ipynb filename
:param cellmark_style: Determines cell marker based on IDE, see parsePy documentation for values
:param other_ignores: Other lines to ignore
"""
# Create the code cells by parsing the file in input
cells = []
for c in parsePy(input, cellmark_style, other_ignores):
codecell, metadata, code = c
cell = new_code_cell(source=code, metadata=metadata) if codecell else new_markdown_cell(source=code, metadata=metadata)
cells.append(cell)
# This creates a V4 Notebook with the code cells extracted above
nb0 = new_notebook(cells=cells,
metadata={'language': 'python',})
with codecs.open(output, encoding='utf-8', mode='w') as f:
nbformat.write(nb0, f, 4)
def py2ipynb_default(input, output):
with open(input) as f:
code = f.read()
code += """
# <markdowncell>
# If you can read this, reads_py() is no longer broken!
"""
nbook = v3.reads_py(code)
nbook = v4.upgrade(nbook) # Upgrade v3 to v4
jsonform = v4.writes(nbook) + "\n"
with open(output, "w") as f:
f.write(jsonform)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("input", help="input python file")
parser.add_argument("output", help="output notebook file")
cellmark_style_arg = parser.add_argument("-c", "--cellmark-style", default="default",
help="default|pycharm|spyder (pycharm)")
args = parser.parse_args()
cellmark_style_options = ("default", "pycharm", "spyder")
if args.cellmark_style not in cellmark_style_options:
raise argparse.ArgumentError(cellmark_style_arg,
"invalid value, can only be one of "+ str(cellmark_style_options))
if args.cellmark_style == "default":
py2ipynb_default(args.input, args.output)
else:
py2ipynb(args.input, args.output, args.cellmark_style,
["# ----------------------------------------------------------------------------"])