def save_css(soup, fo, assets_dir): """ Extracts <style> elements in <head> from the file and replaces them with <link>. """ n = 0 for style in soup.select('head style'): data = style.text.encode(errors='ignore') md5 = get_md5(data) basename = 'style-%s.css' % md5 dest_abs = os.path.join(assets_dir, basename) dest_rel = os.path.relpath(dest_abs, os.path.dirname(fo)) if not os.path.exists(dest_abs): make_sure_dir_exists(dest_abs) with open(dest_abs, 'w') as f: f.write(data) link = Tag(name='link') link.attrs['rel'] = 'stylesheet' link.attrs['type'] = 'text/css' link.attrs['href'] = dest_rel style.replace_with(link) n += 1 return n
def extract_assets_from_file(fi, fo, assets_dir): # logger.info('Extracting assets ___') # logger.info('Input: %s' % fi) # logger.info('Output: %s' % fo) # logger.info('Using assets dir: %s' % assets_dir) make_sure_dir_exists(fo) if not os.path.exists(assets_dir): os.makedirs(assets_dir) soup = read_html_doc_from_file(fi) s0 = os.path.getsize(fi) def savefile(filename_hint, data): """ must return the url (might be equal to filename) """ where = os.path.join(assets_dir, filename_hint) write_data_to_file(data, where) relative = os.path.relpath(where, os.path.dirname(fo)) return relative extract_img_to_file(soup, savefile) write_html_doc_to_file(soup, fo) if False: s1 = os.path.getsize(fo) inmb = lambda x: '%.1f MB' % (x / (1024.0 * 1024)) msg = 'File size: %s -> %s' % (inmb(s0), inmb(s1)) logger.info(msg)
def write_data_to_file(data, filename, quiet=False): """ Writes the data to the given filename. If the data did not change, the file is not touched. """ if not isinstance(data, bytes): msg = 'Expected "data" to be bytes, not %s.' % type(data).__name__ raise ValueError(msg) if len(filename) > 256: msg = 'Invalid argument filename: too long. Did you confuse it with data?' raise ValueError(msg) make_sure_dir_exists(filename) if os.path.exists(filename): current = open(filename).read() if current == data: if not 'assets' in filename: if not quiet: logger.debug('already up to date %s' % friendly_path(filename)) return with open(filename, 'wb') as f: f.write(data) if not quiet: logger.debug('Written to: %s' % friendly_path(filename))
def extract_assets_from_file(data, fo, assets_dir): res = AugmentedResult() make_sure_dir_exists(fo) if not os.path.exists(assets_dir): mkdirs_thread_safe(assets_dir) soup = bs_entire_document(data) def savefile(filename_hint, data_): """ must return the url (might be equal to filename) """ where = os.path.join(assets_dir, filename_hint) write_data_to_file(data_, where, quiet=True) relative = os.path.relpath(where, os.path.dirname(fo)) return relative from mcdp_report.embedded_images import extract_img_to_file nimg = extract_img_to_file(soup, savefile) nsave = save_images_locally(soup, fo, assets_dir) ncss = save_css(soup, fo, assets_dir) n = nimg + nsave + ncss if n: pass # print('Total of %d subs (img %d save %d css %d)' % (n, nimg, nsave, ncss)) write_html_doc_to_file(soup, fo, quiet=True) return res
def pdf2svg(f1, f2): make_sure_dir_exists(f2) cwd = '.' cmd = ['pdf2svg', f1, f2] _res = system_cmd_result(cwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=True)
def extract_page(filename, page, output): make_sure_dir_exists(output) cwd = '.' cmd = ['pdftk', filename, 'cat', str(page), 'output', output] _res = system_cmd_result(cwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=True)
def update_graph(context, event): print('event: %s' % event) if event.name in ['manager-job-starting']: job_id = event.kwargs['job_id'] Global.processing.add(job_id) if event.name in ['manager-job-failed', 'manager-job-succeeded']: job_id = event.kwargs['job_id'] Global.processing.remove(job_id) print('global processing %s' % Global.processing) if 'job_id' in event.kwargs: what = '%s-%s' % (event.name, event.kwargs['job_id']) else: what = event.name filename = os.path.join(Global.dirname, ('step-%04d-%s' % (Global.step, what))) make_sure_dir_exists(filename) # print('step %d: jobs = %s' % (Global.step, Global.job_list)) graph(job_list=list(Global.job_list), context=context, filename=filename, processing=Global.processing, **Global.graph_params) Global.step += 1 # see here: # http://stackoverflow.com/questions/14784405/how-to-set-the-output-size-in-graphviz-for-the-dot-format png = filename + ".png" png2 = filename + "-x.png" size = Global.size dpi = Global.dpi cmd0 = [ 'dot', '-Tpng', '-Gsize=%s,%s\!' % (size[0] / dpi, size[1] / dpi), '-Gdpi=%s' % dpi, '-o' + png, filename ] system_cmd_result('.', cmd0, display_stdout=True, display_stderr=True, raise_on_error=True) cmd = [ 'convert', png, '-gravity', 'center', '-background', 'white', '-extent', '%sx%s' % (size[0], size[1]), png2 ] system_cmd_result('.', cmd, display_stdout=True, display_stderr=True, raise_on_error=True) os.unlink(png)
def pdfcrop_margins(f1, f2, margins): make_sure_dir_exists(f2) cwd = '.' cmd = ['pdfjam', '--keepinfo', '--trim', margins, '--paper', 'letter', '--landscape', '--clip', 'true', '--outfile', f2, f1] # cmd = ['pdfcrop', f1, '--margins', str(margins), f2] _res = system_cmd_result(cwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=True)
def test_toc_numbers1(): s = r""" <div id='toc'></div> # Part One {#part:one} # Chapter One ## Sub One_point_One Referring to [](#fig:One) and [](#fig:Two) and [](#tab:One). Also referring only with numbers: <a href="#fig:One" class='only_number'></a>, <a href="#fig:Two" class='only_number'></a>, <a href="#tab:One" class='only_number'></a>. <s figure-id="fig:One">Figure One</s> ### Sub sub One_point_One_point_One #### Par a #### Par b ## Sub One_point_Two Referring to subfigures [](#subfig:child1) and [](#subfig:child2). <div figure-id="fig:parent"> <div figure-id="subfig:child1" figure-caption="child1"> child1 </div> <div figure-id="subfig:child2" figure-caption="child2"> child2 </div> </div> <div figure-id="code:code1"> <pre><code>code1</code></pre> </div> ## Sub with `code` in the <k>name</k> # Chapter Two <s figure-id="fig:Two">Figure Two</s> <s figure-id="tab:One">Table One</s> ## Sub Two_point_One # Part Two {#part:two} # Chapter Three \begin{definition}[DefinitionA]\label{def:A}Definition A\end{definition} \begin{defn}[DefinitionA2]\label{def:A2}Definition A2\end{defn} \begin{proposition}[PropositionB]\label{prop:B}Proposition B\end{proposition} \begin{problem}[ProblemC]\label{prob:C}Problem C\end{problem} \begin{example}[exampleD]\label{exa:D}...\end{example} \begin{remark}[remarkE]\label{rem:E}...\end{remark} \begin{lemma}[lammaF]\label{lem:F}...\end{lemma} \begin{theorem}[theoremG]\label{thm:G}...\end{theorem} \begin{thm}[theoremG2]\label{thm:G2}...\end{thm} Citing: [](#def:A), [](#prop:B), [](#prob:C), [](#exa:D), [](#rem:E), [](#lem:F), [](#thm:G). Citing full name: <a href="#def:A" class="number_name"></a>, <a href="#prop:B" class="number_name"></a>, <a href="#prob:C" class="number_name"></a>, <a href="#exa:D" class="number_name"></a>, <a href="#rem:E" class="number_name"></a>, <a href="#lem:F" class="number_name"></a>, <a href="#thm:G" class="number_name"></a>. Citing only name: <a href="#def:A" class="only_name"></a>, <a href="#prop:B" class="only_name"></a>, <a href="#prob:C" class="only_name"></a>, <a href="#exa:D" class="only_name"></a>, <a href="#rem:E" class="only_name"></a>, <a href="#lem:F" class="only_name"></a>, <a href="#thm:G" class="only_name"></a>. Citing only number: <a href="#def:A" class="only_number"></a>, <a href="#prop:B" class="only_number"></a>, <a href="#prob:C" class="only_number"></a>, <a href="#exa:D" class="only_number"></a>, <a href="#rem:E" class="only_number"></a>, <a href="#lem:F" class="only_number"></a>, <a href="#thm:G" class="only_number"></a>. # Appendices {#part:appendices} # Appendix A {#app:A} # Appendix B {#app:B} ## App sub B_point_One ### App subsub B_point_One_point_One """ library = MCDPLibrary() raise_errors = True realpath = __name__ s = render_complete(library, s, raise_errors, realpath) files_contents = [DocToJoin(docname='a', contents=s, source_info=None)] stylesheet = 'v_manual_blurb_ready' res = manual_join(template=template, files_contents=files_contents, stylesheet=stylesheet) fn = 'out/comptests/test_toc_numbers1.html' # XXX: write on test folder make_sure_dir_exists(fn) logger.info('written on %s' % fn) with open(fn, 'w') as f: f.write(res)
def update_graph(context, event): print('event: %s' % event) if event.name in ['manager-job-starting']: job_id = event.kwargs['job_id'] Global.processing.add(job_id) if event.name in ['manager-job-failed', 'manager-job-succeeded']: job_id = event.kwargs['job_id'] Global.processing.remove(job_id) print('global processing %s' % Global.processing) if 'job_id' in event.kwargs: what = '%s-%s' % (event.name, event.kwargs['job_id']) else: what = event.name filename = os.path.join(Global.dirname, ('step-%04d-%s' % (Global.step, what))) make_sure_dir_exists(filename) # print('step %d: jobs = %s' % (Global.step, Global.job_list)) graph(job_list=list(Global.job_list), context=context, filename=filename, processing=Global.processing, **Global.graph_params) Global.step += 1 # see here: # http://stackoverflow.com/questions/14784405/how-to-set-the-output-size-in-graphviz-for-the-dot-format png = filename + ".png" png2 = filename + "-x.png" size = Global.size dpi = Global.dpi cmd0 = ['dot', '-Tpng', '-Gsize=%s,%s\!' % (size[0]/dpi, size[1]/dpi), '-Gdpi=%s' % dpi, '-o' + png, filename] system_cmd_result( '.', cmd0, display_stdout=True, display_stderr=True, raise_on_error=True) cmd=['convert', png, '-gravity', 'center', '-background', 'white', '-extent', '%sx%s' % (size[0], size[1]), png2] system_cmd_result( '.', cmd, display_stdout=True, display_stderr=True, raise_on_error=True) os.unlink(png)