This repository has been archived by the owner on Apr 29, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_extractor.py
55 lines (38 loc) · 1.78 KB
/
example_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python2.7
from __future__ import absolute_import, print_function, division
from glob import glob
from os import mkdir
from os.path import abspath, dirname, isdir, join as joinpath
from lxml import etree
from lxml.html import parse as parse_html
HOCUS_ROOT = dirname(abspath(__file__))
_EXAMPLE_TEMPLATE_FILEPATH = joinpath(HOCUS_ROOT, 'example_template.html')
with open(_EXAMPLE_TEMPLATE_FILEPATH, 'r') as example_template_file:
EXAMPLE_TEMPLATE = example_template_file.read()
def log(*args, **kwargs):
print(*args, **kwargs)
def extract_examples(docs_html_filepaths, output_directory):
for docs_html_filepath in docs_html_filepaths:
with open(docs_html_filepath, 'r') as docs_html_file:
dom = parse_html(docs_html_file)
log('Extracting from', docs_html_filepath)
examples = dom.xpath('//*[@data-example-id]')
for example in examples:
example_id = example.get('data-example-id')
if not example_id:
continue
example_filename = example_id + '.html'
example_filepath = joinpath(output_directory, example_filename)
example_html = EXAMPLE_TEMPLATE.format(example_html=etree.tostring(example))
with open(example_filepath, 'w') as example_html_file:
example_html_file.write(example_html)
log('Wrote', example_filepath)
def main():
# Assumes that CWD is /bootstrap/
docs_html_filepaths = glob(abspath('./_gh_pages/*/index.html'))
example_html_files_output_dir = abspath('./_gh_pages/extracted-examples')
if not isdir(example_html_files_output_dir):
mkdir(example_html_files_output_dir)
extract_examples(docs_html_filepaths, example_html_files_output_dir)
if __name__ == '__main__':
main()