def define_jobs_context(self, context): ifilename = self.options.filename output_dir = self.options.output_dir mathjax = self.options.mathjax preamble = self.options.preamble # disqus = self.options.disqus logger.setLevel(logging.DEBUG) if self.options.workers != 0: n = self.options.workers else: n = max(1, cpu_count() - 2) self.debug("Using n = %d workers" % n) data = open(ifilename).read() h = get_md5(data)[-4:] jobs = [] for i in range(n): promise = context.comp_dynamic(go, i, n, ifilename, mathjax, preamble, output_dir, job_id='worker-%d-of-%d-%s' % (i, n, h)) jobs.append(promise.job_id)
def get_link_to_image_file(filename, max_width): basename, ext = os.path.splitext(os.path.basename(filename).lower()) if ext in ['.jpg', '.jpeg']: with open(filename) as f: im = Image.open(f) # print filename, im.size if im.size[0] > max_width: b = basename + '-' + get_md5(filename)[:4] + '.jpg' dest = os.path.join(get_mcdp_tmp_dir(), 'images', b) height = int(im.size[1] * max_width / im.size[0]) new_size = (max_width, height) msg = 'Resizing image %s from %s to %s' % (filename, im.size, new_size) logger.info(msg) # print('resizing to %s in %s' % (str(new_size), dest)) if not os.path.exists(dest): make_sure_dir_exists(dest) resized = im.resize(new_size) resized.save(dest) return dest # im.save(file + ".thumbnail", "JPEG") return filename else: return filename
def subfloat_replace(args, opts): contents = args[0] caption = opts[0] check_isinstance(contents, str) if caption is None: label = None else: caption, label = get_s_without_label(caption, labelprefix="fig:") if label is None: caption, label = get_s_without_label(caption, labelprefix="subfig:") if label is not None and not label.startswith('subfig:'): msg = 'Subfigure labels should start with "subfig:"; found %r.' % ( label) label = 'sub' + label msg += 'I will change to %r.' % label logger.debug(msg) # we need to make up an ID if label is None: label = 'subfig:' + get_md5(contents) # print('making up label %r' % label) # if label is not None: idpart = ' id="%s"' % label # else: # idpart = "" if caption is None: caption = 'no subfloat caption' res = '<figure class="subfloat"%s>%s<figcaption>%s</figcaption></figure>' % ( idpart, contents, caption) return res
def extract_img_to_file_(soup, savefile, tagname, attrname): n = 0 tot = 0 for tag in soup.select(tagname): tot += 1 src = tag[attrname] if not src.startswith('data:'): continue mime, data = get_mime_data_from_base64_string(src) # now we should make up the data if tag.has_attr('id'): basename = tag['id'] else: md5 = get_md5(data) basename = 'data-from-%s-%s' % (tagname, md5) # Guess extension ext = get_ext_for_mime(mime) filename = basename + '.' + ext src = "%s" % filename # ask what we should be using use_src = savefile(filename, data) check_isinstance(use_src, str) tag[attrname] = use_src n += 1 logger.debug(('extract_img_to_file: extracted %d/%d images from %r tags, ' ' attribute %r.') % (n, tot, tagname, attrname))
def replace_m(inside, opt): # print('replacing environment %r inside %r opt %r' % (envname, inside, opt)) thm_label = opt contents, label = get_s_without_label(inside, labelprefix=labelprefix) if label is not None and isinstance(labelprefix, str): assert label.startswith(labelprefix), (s, labelprefix, label) if label is not None: id_part = "id='%s' " % label else: if make_label_if_missing: if isinstance(labelprefix, tuple): usel = labelprefix[0] else: usel = labelprefix makeup_id = usel + get_md5(inside)[:5] id_part = "id='%s'" % makeup_id else: id_part = '' # print('using label %r for env %r (labelprefix %r)' % (label, envname, labelprefix)) l = "<span class='%s_label latex_env_label'>%s</span>" % ( classname, thm_label) if thm_label else "" rr = '<div %sclass="%s latex_env" markdown="1">%s%s</div>' % ( id_part, classname, l, contents) return rr
def hash_code(self): codes = [] for f in sorted(self._files): codes.append([f, self._files[f].hash_code()]) for d in sorted(self._directories): codes.append([d, self._directories[d].hash_code()]) return get_md5(yaml_dump(codes))
def data_hash_code(s): if s is None: return 'None' if isinstance(s, str): return get_md5(s) elif isinstance(s, datetime.datetime): return get_md5(yaml_dump(s)) elif isinstance(s, list): return get_md5("-".join(map(data_hash_code, s))) elif isinstance(s, dict): keys = sorted(s) values = [s[k] for k in keys] codes = ['%s-%s' % (k, data_hash_code(v)) for k,v in zip(keys, values)] return data_hash_code("_".join(codes)) else: msg = 'Invalid type %s' % describe_type(s) raise ValueError(msg)
def job_bib_contents(context, bib_files): bib_files = natsorted(bib_files) # read all contents contents = "" for fn in bib_files: contents += open(fn).read() + '\n\n' h = get_md5(contents)[:8] job_id = 'bibliography-' + h return context.comp(run_bibtex2html, contents, job_id=job_id)
def create_split_jobs(context, data_aug, mathjax, preamble, output_dir, nworkers=0, extra_panel_content=None, add_toc_if_not_existing=True, output_crossref=None, permalink_prefix=None, only_refs=False, reveal=True): data = data_aug.get_result() if nworkers == 0: nworkers = max(1, cpu_count() - 2) res = AugmentedResult() res.merge(data_aug) h = get_md5(data)[-4:] jobs = [] assets_dir = os.path.join(output_dir, 'assets') with timeit("preprocess"): soup = bs_entire_document(data) embed_css_files(soup) fo = os.path.join(output_dir, 'dummy.html') save_css(soup, fo, assets_dir) data = to_html_entire_document(soup) for i in range(nworkers): promise = context.comp_dynamic( go, i, nworkers, data, mathjax, preamble, output_dir, add_toc_if_not_existing=add_toc_if_not_existing, assets_dir=assets_dir, extra_panel_content=extra_panel_content, output_crossref=output_crossref, permalink_prefix=permalink_prefix, only_refs=only_refs, job_id='worker-%d-of-%d-%s' % (i, nworkers, h)) jobs.append(promise) if only_refs: break if reveal: jobs.append(context.comp(download_reveal, output_dir)) return context.comp(notification, res, jobs, output_dir)
def extract_svg_to_file(soup, savefile): n = 0 tot = 0 for i, svg in enumerate(list(soup.select('svg'))): tot += 1 if not svg.attrs.get('class', ''): # only do the ones we rendered #XXX continue # <svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.505ex;" viewbox="0 svg['xmlns'] = "http://www.w3.org/2000/svg" svg['version'] = "1.1" prefix = """<?xml version="1.0"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n""" img = Tag(name='img') if 'width' in svg.attrs: add_style(img, width=svg['width'] + 'pt', height=svg['height'] + 'pt') svg.attrs.pop('width') svg.attrs.pop('height') data = prefix + str(svg) md5 = get_md5(data) basename = 'svg-%03d-%s' % (i, md5) propose = basename + '.svg' url = savefile(propose, data) # for k, v in svg.attrs.items(): # img[k] = v img['class'] = svg.attrs.get('class', '') if 'id' in svg: img['id'] = svg['id'] img['src'] = url svg.replace_with(img) logger.debug('extract_svg_to_file: extracted %d/%d images from SVG tags.' % (n, tot))
def extract_img_to_file_(soup, savefile, tagname, attrname): n = 0 tot = 0 for tag in soup.select(tagname): tot += 1 if not attrname in tag.attrs: msg = 'No attr %r found for tag %s' % (attrname, tag) logger.warning(msg) continue src = tag.attrs[attrname] if not src.startswith('data:'): continue mime, data = get_mime_data_from_base64_string(src) # now we should make up the data if tag.has_attr('id'): basename = tag['id'] else: md5 = get_md5(data) basename = 'data-from-%s-%s' % (tagname, md5) # Guess extension ext = get_ext_for_mime(mime) filename = basename + '.' + ext # src = "%s" % filename # ask what we should be using # print('saving file %s with %d data' % (filename, len(data))) use_src = savefile(filename, data) check_isinstance(use_src, str) tag[attrname] = use_src n += 1 if False: logger.debug( ('extract_img_to_file: extracted %d/%d images from %r tags, ' ' attribute %r.') % (n, tot, tagname, attrname)) return n
def todoist_sync(data, user, secret, prefix): import todoist api = todoist.TodoistAPI() api.user.login(user, secret) response = api.sync() if 'error' in response: print response raise Exception(response['error_extra']) projects = api.projects.all() # print projects use = 'Duckuments Tasks' for p in projects: if p['name'] == use: project_id = p['id'] break else: msg = 'Could not find project %r' % use msg += '\nAvailable: %s' % ", ".join((_['name'] for _ in projects)) raise Exception(msg) collaborators = response['collaborators'] for c in collaborators: print c['full_name'], c['email'] items = [_ for _ in api.items.all() if _['project_id'] == project_id] found = {} for i in items: if ';' in i['content']: tokens = i['content'].split(';') found[tokens[1].strip()] = i tasks = data.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_TASK) n = 0 for task in tasks: responsible_uid = get_task_person_uid(task, collaborators) ID = get_md5(str(task.msg))[-8:] if ID in found: msg = 'Task %s already in DB' % ID logger.info(msg) todoist_id = found[ID]['id'] item = found[ID] if item['checked']: logger.debug('Setting the item to not done.') api.items.uncomplete([todoist_id]) if found[ID]['responsible_uid'] != responsible_uid: print('need to update responsible id - to %s' % responsible_uid) # print found[ID] api.items.update(todoist_id, responsible_uid=responsible_uid) continue html = task.as_html() for img in list(html.select('img')): img.extract() for a in html.select('a[href]'): href = a.attrs['href'] if not href.startswith('http'): href = prefix + href a.attrs['href'] = href html_str = str(html) html_str = html_str.replace('<br/>', ' ') res = tomd.Tomd(html_str).markdown location = list(task.locations.values())[0] stack = location.get_stack() for l in stack: if isinstance(l, GithubLocation): head = l.path break else: head = '???' content = head + '; ' + ID # content = res[:100] item = api.items.add(content, project_id, responsible_uid=responsible_uid) api.notes.add(item['id'], res) n += 1 if n > 45: break time.sleep(0.2) api.commit() api.sync()
def makefigure(inside, opt, asterisk): # @UnusedVariable align = opt # @UnusedVariable # print('makefigure inside = %r' % inside) def subfloat_replace(args, opts): contents = args[0] caption = opts[0] check_isinstance(contents, str) if caption is None: label = None else: caption, label = get_s_without_label(caption, labelprefix="fig:") if label is None: caption, label = get_s_without_label(caption, labelprefix="subfig:") if label is not None and not label.startswith('subfig:'): msg = 'Subfigure labels should start with "subfig:"; found %r.' % ( label) label = 'sub' + label msg += 'I will change to %r.' % label logger.debug(msg) # we need to make up an ID if label is None: label = 'subfig:' + get_md5(contents) # print('making up label %r' % label) # if label is not None: idpart = ' id="%s"' % label # else: # idpart = "" if caption is None: caption = 'no subfloat caption' res = '<figure class="subfloat"%s>%s<figcaption>%s</figcaption></figure>' % ( idpart, contents, caption) return res inside = substitute_command_ext(inside, 'subfloat', subfloat_replace, nargs=1, nopt=1) class Tmp: label = None def sub_caption(args, opts): assert not opts and len(args) == 1 x, Tmp.label = get_s_without_label(args[0], labelprefix="fig:") res = '<figcaption>' + x + "</figcaption>" # print('caption args: %r, %r' % (args, opts)) return res inside = substitute_command_ext(inside, 'caption', sub_caption, nargs=1, nopt=0) # print('makefigure inside without caption = %r' % inside) assert not '\\caption' in inside if Tmp.label is None: Tmp.label = 'fig:' + get_md5(inside) #print('making up label %r' % Tmp.label) # if Tmp.label is not None: idpart = ' id="%s"' % Tmp.label # else: # idpart = "" res = '<figure%s>%s</figure>' % (idpart, inside) return res
def render_complete(library, s, raise_errors, realpath, generate_pdf=False, check_refs=False, use_mathjax=True, filter_soup=None, symbols=None): """ Transforms markdown into html and then renders the mcdp snippets inside. s: a markdown string with embedded html snippets Returns an HTML string; not a complete document. filter_soup(library, soup) """ s0 = s check_good_use_of_special_paragraphs(s0, realpath) raise_missing_image_errors = raise_errors # Imports here because of circular dependencies from .latex.latex_preprocess import extract_maths, extract_tabular from .latex.latex_preprocess import latex_preprocessing from .latex.latex_preprocess import replace_equations from .macro_col2 import col_macros, col_macros_prepare_before_markdown from .mark.markd import render_markdown from .preliminary_checks import do_preliminary_checks_and_fixes from .prerender_math import prerender_mathjax if isinstance(s, unicode): msg = 'I expect a str encoded with utf-8, not unicode.' raise_desc(TypeError, msg, s=s) # need to do this before do_preliminary_checks_and_fixes # because of & char s, tabulars = extract_tabular(s) s = do_preliminary_checks_and_fixes(s) # put back tabular, because extract_maths needs to grab them for k, v in tabulars.items(): assert k in s s = s.replace(k, v) # copy all math content, # between $$ and $$ # between various limiters etc. # returns a dict(string, substitution) s, maths = extract_maths(s) # print('maths = %s' % maths) for k, v in maths.items(): if v[0] == '$' and v[1] != '$$': if '\n\n' in v: msg = 'Suspicious math fragment %r = %r' % (k, v) logger.error(maths) logger.error(msg) raise ValueError(msg) s = latex_preprocessing(s) s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s # cannot parse html before markdown, because md will take # invalid html, (in particular '$ ciao <ciao>' and make it work) s = s.replace('*}', '\*}') s, mcdpenvs = protect_my_envs(s) # print('mcdpenvs = %s' % maths) s = col_macros_prepare_before_markdown(s) # print(indent(s, 'before markdown | ')) s = render_markdown(s) # print(indent(s, 'after markdown | ')) for k, v in maths.items(): if not k in s: msg = 'Cannot find %r (= %r)' % (k, v) raise_desc(DPInternalError, msg, s=s) def preprocess_equations(x): # this gets mathjax confused x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b x = x.replace('<', '\\lt{}') # print('replaced equation %r by %r ' % (x0, x)) return x v = preprocess_equations(v) s = s.replace(k, v) s = replace_equations(s) s = s.replace('\\*}', '*}') # this parses the XML soup = bs(s) other_abbrevs(soup) # need to process tabular before mathjax escape_for_mathjax(soup) # print(indent(s, 'before prerender_mathjax | ')) # mathjax must be after markdown because of code blocks using "$" s = to_html_stripping_fragment(soup) if use_mathjax: s = prerender_mathjax(s, symbols) soup = bs(s) escape_for_mathjax_back(soup) s = to_html_stripping_fragment(soup) # print(indent(s, 'after prerender_mathjax | ')) for k, v in mcdpenvs.items(): # there is this case: # ~~~ # <pre> </pre> # ~~~ s = s.replace(k, v) s = s.replace('<p>DRAFT</p>', '<div class="draft">') s = s.replace('<p>/DRAFT</p>', '</div>') soup = bs(s) mark_console_pres(soup) try: substitute_github_refs(soup, defaults={}) except Exception as e: msg = 'I got an error while substituting github: references.' msg += '\nI will ignore this error because it might not be the fault of the writer.' msg += '\n\n' + indent(str(e), '|', ' error: |') logger.warn(msg) # must be before make_figure_from_figureid_attr() display_files(soup, defaults={}, raise_errors=raise_errors) make_figure_from_figureid_attr(soup) col_macros(soup) fix_subfig_references(soup) library = get_library_from_document(soup, default_library=library) from mcdp_docs.highlight import html_interpret html_interpret(library, soup, generate_pdf=generate_pdf, raise_errors=raise_errors, realpath=realpath) if filter_soup is not None: filter_soup(library=library, soup=soup) embed_images_from_library2(soup=soup, library=library, raise_errors=raise_missing_image_errors) make_videos(soup=soup) if check_refs: check_if_any_href_is_invalid(soup) if getuser() == 'andrea': if MCDPConstants.preprocess_style_using_less: run_lessc(soup) else: logger.warning( 'preprocess_style_using_less=False might break the manual') fix_validation_problems(soup) strip_pre(soup) if MCDPManualConstants.enable_syntax_higlighting: syntax_highlighting(soup) if MCDPManualConstants.enforce_status_attribute: check_status_codes(soup, realpath) if MCDPManualConstants.enforce_lang_attribute: check_lang_codes(soup) # Fixes the IDs (adding 'sec:'); add IDs to missing ones globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5( s0)[:5] fix_ids_and_add_missing(soup, globally_unique_id_part) check_no_patently_wrong_links(soup) s = to_html_stripping_fragment(soup) s = replace_macros(s) return s
def go(context, worker_i, num_workers, data, mathjax, preamble, output_dir, assets_dir, add_toc_if_not_existing, extra_panel_content, permalink_prefix=None, output_crossref=None, only_refs=False): res = AugmentedResult() soup = bs_entire_document(data) # extract the main toc if it is there with timeit("Extracting main toc"): main_toc = soup.find(id=MCDPManualConstants.MAIN_TOC_ID) if main_toc is None: if add_toc_if_not_existing: # logger.info('Generating TOC because it is not there') tocg = generate_toc(soup) main_toc = bs(tocg).ul main_toc.attrs['class'] = 'toc' # XXX: see XXX13 assert main_toc is not None substituting_empty_links(main_toc, raise_errors=False, res=res, extra_refs=soup) else: msg = 'Could not find main toc (id #%s)' % MCDPManualConstants.MAIN_TOC_ID res.note_error(msg) main_toc = Tag(name='div') main_toc.append('TOC NOT FOUND') else: main_toc = main_toc.__copy__() if 'id' in main_toc.attrs: del main_toc.attrs['id'] # XXX: this is not the place to do it mark_toc_links_as_errored(main_toc, soup) body = soup.html.body with timeit("split_in_files"): filename2contents = split_in_files(body) id2filename = get_id2filename(filename2contents) res.set_result(id2filename) if output_crossref is not None: from mcdp_docs.mcdp_render_manual import write_crossref_info context.comp(write_crossref_info, data=data, id2filename=id2filename, output_crossref=output_crossref, permalink_prefix=permalink_prefix) if only_refs: logger.debug('Skipping rest because only_refs') return res with timeit("add_prev_next_links"): filename2contents = add_prev_next_links(filename2contents) with timeit("preparing assets dir"): if not os.path.exists(output_dir): try: os.makedirs(output_dir) except: pass with timeit("creating link.html and link.js"): linkbase = 'link.html' # do not change (it's used by http://purl.org/dth) linkbasejs = 'link.js' lb = create_link_base(id2filename) write_data_to_file(str(lb), os.path.join(output_dir, linkbase), quiet=True) linkjs = create_link_base_js(id2filename) write_data_to_file(str(linkjs), os.path.join(output_dir, linkbasejs), quiet=True) if preamble is not None: if preamble.endswith('.tex'): # XXX preamble = open(preamble).read() ids_to_use = [] for k in list(id2filename): if not 'autoid' in k: ids_to_use.append(k) ids_to_use = sorted(ids_to_use) pointed_to = [] for k in ids_to_use: f = id2filename[k] if not f in pointed_to: pointed_to.append(f) # data = ",".join(pointed_to) head0 = soup.html.head if True: context.comp(remove_spurious, output_dir, list(filename2contents)) with timeit('main_toc copy'): main_toc0 = main_toc.__copy__() main_toc0_s = str(main_toc0) asset_jobs = [] for i, (filename, contents) in enumerate(filename2contents.items()): if i % num_workers != worker_i: continue with timeit('main_toc copy hack'): main_toc = bs(main_toc0_s).ul assert main_toc is not None # Trick: we add the main_toc, and then ... (look below) with timeit('make_page'): add_home_link = 'index.html' not in filename2contents html = make_page(contents, head0, main_toc, extra_panel_content, add_home_link=add_home_link) with timeit("direct job"): result = only_second_part(mathjax, preamble, html, id2filename, filename) # ... we remove it. In this way we don't have to copy it every time... main_toc.extract() fn = os.path.join(output_dir, filename) h = get_md5(result)[:8] r = context.comp(extract_assets_from_file, result, fn, assets_dir, job_id='%s-%s-assets' % (filename, h)) asset_jobs.append(r) update_refs_('toc.html', main_toc, id2filename) out_toc = os.path.join(output_dir, 'toc.html') write_data_to_file(str(main_toc), out_toc, quiet=True) return context.comp(wait_assets, res, asset_jobs)
def manual_jobs(context, src_dirs, resources_dirs, out_split_dir, output_file, generate_pdf, stylesheet, stylesheet_pdf, use_mathjax, raise_errors, resolve_references=True, remove=None, filter_soup=None, symbols=None, out_pdf=None, only_refs=False, permalink_prefix=None, compose_config=None, output_crossref=None, do_last_modified=False, wordpress_integration=False, ignore_ref_errors=False, likebtn=None, extra_crossrefs=None): """ src_dirs: list of sources symbols: a TeX preamble (or None) """ # # if symbols is not None: # symbols = open(symbols).read() if stylesheet_pdf is None: stylesheet_pdf = stylesheet # outdir = os.path.dirname(out_split_dir) # XXX filenames = get_markdown_files(src_dirs) if not filenames: msg = "Could not find any file for composing the book." raise Exception(msg) files_contents = [] for i, filename in enumerate(filenames): if is_ignored_by_catkin(filename): logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename) continue logger.info('adding document %s ' % friendly_path(filename)) docname, _ = os.path.splitext(os.path.basename(filename)) contents = open(filename).read() contents_hash = get_md5(contents)[:8] # because of hash job will be automatically erased if the source changes out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash) job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash) try: source_info = get_source_info(filename) except NoSourceInfo as e: logger.warn('No source info for %s:\n%s' % (filename, e)) source_info = None for d in src_dirs: if filename.startswith(d): break else: msg = "Could not find dir for %s in %s" % (filename, src_dirs) raise Exception(msg) html_contents = context.comp(render_book, generate_pdf=generate_pdf, src_dirs=src_dirs + resources_dirs, data=contents, realpath=filename, use_mathjax=use_mathjax, symbols=symbols, raise_errors=raise_errors, filter_soup=filter_soup, ignore_ref_errors=ignore_ref_errors, job_id=job_id) doc = DocToJoin(docname=out_part_basename, contents=html_contents, source_info=source_info) files_contents.append(tuple(doc)) # compmake doesn't do namedtuples ignore = [] if output_crossref: ignore.append(output_crossref) crossrefs_aug = get_cross_refs(resources_dirs, permalink_prefix, extra_crossrefs, ignore=ignore) bib_files = get_bib_files(src_dirs) logger.debug('Found bib files:\n%s' % "\n".join(bib_files)) if bib_files: bib_contents_aug = job_bib_contents(context, bib_files) entry = DocToJoin(docname='bibtex', contents=bib_contents_aug, source_info=None) files_contents.append(tuple(entry)) if do_last_modified: data_aug = context.comp(make_last_modified, files_contents=files_contents) entry = DocToJoin(docname='last_modified', contents=data_aug, source_info=None) files_contents.append(tuple(entry)) root_dir = src_dirs[0] template = get_main_template(root_dir, resources_dirs) references = OrderedDict() # base_url = 'http://book.duckietown.org/master/duckiebook/pdoc' # for extra_dir in extra_dirs: # res = read_references(extra_dir, base_url, prefix='python:') # references.update(res) # extra = look_for_files(extra_dirs, "*.html") # # for filename in extra: # contents = open(filename).read() # docname = os.path.basename(filename) + '_' + get_md5(filename)[:5] # c = (('unused', docname), contents) # files_contents.append(c) cs = get_md5((crossrefs_aug.get_result()))[:8] joined_aug = context.comp(manual_join, template=template, files_contents=files_contents, stylesheet=None, remove=remove, references=references, resolve_references=resolve_references, crossrefs_aug=crossrefs_aug, permalink_prefix=permalink_prefix, job_id='join-%s' % cs) if compose_config is not None: try: data = yaml.load(open(compose_config).read()) # XXX compose_config_interpreted = ComposeConfig.from_yaml(data) except ValueError as e: msg = 'Cannot read YAML config file %s' % compose_config raise_wrapped(UserError, e, msg, compact=True) else: joined_aug = context.comp(make_composite, compose_config_interpreted, joined_aug) joined_aug = context.comp(mark_errors_and_rest, joined_aug) if likebtn: joined_aug = context.comp(add_likebtn, joined_aug, likebtn) if wordpress_integration: joined_aug = context.comp(add_related, joined_aug) if output_file is not None: context.comp(write, joined_aug, output_file) if out_split_dir is not None: joined_aug_with_html_stylesheet = context.comp(add_style, joined_aug, stylesheet) extra_panel_content = context.comp(get_extra_content, joined_aug_with_html_stylesheet) id2filename_aug = context.comp_dynamic( create_split_jobs, data_aug=joined_aug_with_html_stylesheet, mathjax=True, preamble=symbols, extra_panel_content=extra_panel_content, output_dir=out_split_dir, nworkers=0, output_crossref=output_crossref, permalink_prefix=permalink_prefix, only_refs=only_refs) if not only_refs: context.comp(write_errors_and_warnings_files, id2filename_aug, out_split_dir) context.comp(write_manifest_html, out_split_dir) if out_pdf is not None: joined_aug_with_pdf_stylesheet = context.comp(add_style, joined_aug, stylesheet_pdf) prerendered = context.comp(prerender, joined_aug_with_pdf_stylesheet, symbols=symbols) pdf_data = context.comp(render_pdf, prerendered) context.comp(write_data_to_file, pdf_data, out_pdf) context.comp(write_manifest_pdf, out_pdf)
def hash_code(self): return get_md5(self.contents)
def manual_jobs(context, src_dirs, output_file, generate_pdf, stylesheet, use_mathjax, raise_errors, resolve_references=True, remove=None, filter_soup=None, extra_css=None, symbols=None, do_last_modified=False): """ src_dirs: list of sources symbols: a TeX preamble (or None) """ filenames = get_markdown_files(src_dirs) print('using:') print("\n".join(filenames)) if not filenames: msg = 'Could not find any file for composing the book.' raise Exception(msg) files_contents = [] for i, filename in enumerate(filenames): if is_ignored_by_catkin(filename): logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename) continue logger.info('adding document %s ' % friendly_path(filename)) docname, _ = os.path.splitext(os.path.basename(filename)) contents = open(filename).read() contents_hash = get_md5(contents)[:8] # because of hash job will be automatically erased if the source changes out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash) job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash) source_info = get_source_info(filename) # find the dir for d in src_dirs: if os.path.realpath(d) in filename: break else: msg = 'Could not find dir for %s in %s' % (filename, src_dirs) html_contents = context.comp(render_book, generate_pdf=generate_pdf, src_dirs=src_dirs, data=contents, realpath=filename, use_mathjax=use_mathjax, symbols=symbols, raise_errors=raise_errors, main_file=output_file, out_part_basename=out_part_basename, filter_soup=filter_soup, extra_css=extra_css, job_id=job_id) doc = DocToJoin(docname=out_part_basename, contents=html_contents, source_info=source_info) files_contents.append(tuple(doc)) # compmake doesn't do namedtuples bib_files = get_bib_files(src_dirs) logger.debug('Found bib files:\n%s' % "\n".join(bib_files)) if bib_files: bib_contents = job_bib_contents(context, bib_files) entry = DocToJoin(docname='bibtex', contents=bib_contents, source_info=None) files_contents.append(tuple(entry)) if do_last_modified: data = context.comp(make_last_modified, files_contents=files_contents) entry = DocToJoin(docname='last_modified', contents=data, source_info=None) files_contents.append(tuple(entry)) root_dir = src_dirs[0] template = get_main_template(root_dir) references = OrderedDict() # base_url = 'http://book.duckietown.org/master/duckiebook/pdoc' # for extra_dir in extra_dirs: # res = read_references(extra_dir, base_url, prefix='python:') # references.update(res) # extra = look_for_files(extra_dirs, "*.html") # # for filename in extra: # contents = open(filename).read() # docname = os.path.basename(filename) + '_' + get_md5(filename)[:5] # c = (('unused', docname), contents) # files_contents.append(c) d = context.comp(manual_join, template=template, files_contents=files_contents, stylesheet=stylesheet, remove=remove, references=references, resolve_references=resolve_references) context.comp(write, d, output_file) if os.path.exists(MCDPManualConstants.pdf_metadata_template): context.comp(generate_metadata, root_dir)
def go(context, worker_i, num_workers, ifilename, mathjax, preamble, output_dir): with timeit("reading %s" % ifilename): soup = read_html_doc_from_file(ifilename) # extract the main toc if it is there with timeit("Extracting main_toc"): main_toc = soup.find(id='main_toc') if main_toc is None: msg = 'Could not find the element #main_toc.' raise ValueError(msg) main_toc = main_toc.__copy__() del main_toc.attrs['id'] body = soup.html.body with timeit("split_in_files"): filename2contents = split_in_files(body) with timeit("add_prev_next_links"): filename2contents = add_prev_next_links(filename2contents) with timeit("preparing assets dir"): if not os.path.exists(output_dir): try: os.makedirs(output_dir) except: pass assets_dir = os.path.join(output_dir, 'assets') with timeit("creating link.html and link.js"): id2filename = get_id2filename(filename2contents) #<<<<<<< HEAD linkbase = 'link.html' # do not change (it's used by http://purl.org/dth) linkbasejs = 'link.js' #======= # linkbase = 'link.html' # do not change (it's used by http://purl.org/dth) #>>>>>>> nl lb = create_link_base(id2filename) write_data_to_file(str(lb), os.path.join(output_dir, linkbase)) linkjs = create_link_base_js(id2filename) write_data_to_file(str(linkjs), os.path.join(output_dir, linkbasejs)) if preamble: preamble = open(preamble).read() ids_to_use = [] for k in list(id2filename): if not 'autoid' in k: ids_to_use.append(k) ids_to_use = sorted(ids_to_use) pointed_to = [] for k in ids_to_use: f = id2filename[k] if not f in pointed_to: pointed_to.append(f) data = ",".join(pointed_to) links_hash = get_md5(data)[:8] # if self.options.faster_but_imprecise: # links_hash = "nohash" # # logger.debug('hash data: %r' % data) logger.debug('hash value: %r' % links_hash) head0 = soup.html.head if True: context.comp(remove_spurious, output_dir, list(filename2contents)) tmpd = create_tmpdir() n = len(filename2contents) for i, (filename, contents) in enumerate(filename2contents.items()): if (i % num_workers != worker_i): continue # contents_hash = get_md5(str(contents) + str(preamble))[:8] # job_id = '%s-%s-%s' % (filename, links_hash, contents_hash) #<<<<<<< HEAD # Trick: we add the main_toc, and then ... (look below) with timeit('make_page'): html = make_page(contents, head0, main_toc) with timeit('main_toc copy'): main_toc = main_toc.__copy__() logger.debug('%d/%d: %s' % (i, n, filename)) with timeit("direct job"): result = only_second_part(mathjax, preamble, html, id2filename, filename) # ... we remove it. In this way we don't have to copy it main_toc.extract() fn = os.path.join(output_dir, filename) fn0 = os.path.join(tmpd, filename) write_data_to_file(result, fn0, quiet=True) h = get_md5(result)[:8] context.comp(extract_assets_from_file, fn0, fn, assets_dir, job_id='assets-%s' % h)