def write_data_to_file(data, filename, quiet=False): """ Writes the data to the given filename. If the data did not change, the file is not touched. """ if not isinstance(data, bytes): msg = 'Expected "data" to be bytes, not %s.' % type(data).__name__ raise ValueError(msg) if len(filename) > 256: msg = 'Invalid argument filename: too long. Did you confuse it with data?' raise ValueError(msg) make_sure_dir_exists(filename) if os.path.exists(filename): current = open(filename).read() if current == data: if not 'assets' in filename: if not quiet: logger.debug('already up to date %s' % friendly_path(filename)) return with open(filename, 'wb') as f: f.write(data) if not quiet: logger.debug('Written to: %s' % friendly_path(filename))
def process_one(filename, page, target_pdf, target_svg, tmpdir, name): extracted = os.path.join(tmpdir, name + '_extracted.pdf') cropped = os.path.join(tmpdir, name + '_extracted_no_label.pdf') extract_page(filename, page, extracted) pdfcrop_margins(extracted, cropped, "0mm 0mm 0mm 5cm") pdfcrop(cropped, target_pdf) pdf2svg(target_pdf, target_svg) logger.info('Wrote %s' % friendly_path(target_pdf)) logger.info('Wrote %s' % friendly_path(target_svg))
def embed_css_files(soup): """ Look for <link> elements of CSS and embed them if they are local files""" # <link href="..." rel="stylesheet" type="text/css"/> for link in list( soup.findAll('link', attrs={ 'rel': 'stylesheet', 'href': True })): href = link.attrs['href'] if href.startswith('file://'): filename = href.replace('file://', '') elif href.startswith('/'): # not on windows? filename = href else: filename = None if filename is not None: if not os.path.exists(filename): msg = 'Cannot find CSS file %s' % filename logger.error(msg) else: logger.info('Embedding %r' % friendly_path(filename)) data = open(filename).read() style = Tag(name='style') style.attrs['type'] = 'text/css' style.string = data link.replace_with(style)
def check(ext, msg): if ext in ext2filenames: msg += '\nOffending files: ' for f in ext2filenames[ext]: msg += '\n %s ' % friendly_path(f) raise Exception(msg)
def process_svg_files(root, out, preamble): logger.info('root = %s' % root) logger.info('out = %s' % out) logger.info('preamble = %s' % preamble) if not os.path.exists(out): os.makedirs(out) logger.info('Looking for *.svg files...') svgs = locate_files(root, '*.svg') logger.info('%d found in %s' % (len(svgs), friendly_path(root))) errors = [] for f in svgs: dirname = os.path.dirname(f) basename, _ = os.path.splitext(os.path.basename(f)) target = os.path.join(out, basename + '.pdf') target = os.path.join(dirname, basename + '.pdf') if not needs_remake(f, target): msg = 'The target %r is up to date.' % target logger.info(msg) else: msg = 'Will build target %r.' % target logger.info(msg) tmpdir = create_tmpdir('svg-%s' % basename) try: process_svg_file(f, target, preamble, tmpdir) except Exception as e: logger.error(e) errors.append(e)
def __repr__(self): d = OrderedDict() d['filename'] = friendly_path(self.filename) if self.github_info is not None: d['github'] = self.github_info else: d['github'] = '(not available)' s = "LocalFile" s += '\n' + indent(pretty_print_dict(d), '| ') return s
def check_bad_input_file_presence(d): ext2filenames = collect_by_extension(d) s = '## Filename extensions statistics' s += "\nFound in %s:" % friendly_path(d) for ext in sorted(ext2filenames, key=lambda k: -len(ext2filenames[k])): x = ext if ext else '(no ext)' s += '\n %3d %10s files' % ( len(ext2filenames[ext]), x) # from mcdp import logger # logger.info(s) no_forbidden(ext2filenames) check_lfs_checkout(ext2filenames)
def read_commands_from_file(filename, context): from compmake.jobs.uptodate import CacheQueryDB filename = os.path.realpath(filename) if filename in context.rc_files_read: return else: context.rc_files_read.append(filename) cq = CacheQueryDB(context.get_compmake_db()) assert context is not None info('Reading configuration from %r.' % friendly_path(filename)) with open(filename, 'r') as f: for line in f: line = line.strip() if not line: continue if line[0] == '#': continue interpret_commands_wrap(line, context=context, cq=cq)
def read_commands_from_file(filename, context): from compmake.jobs.uptodate import CacheQueryDB filename = os.path.realpath(filename) if filename in context.rc_files_read: return else: context.rc_files_read.append(filename) cq = CacheQueryDB(context.get_compmake_db()) assert context is not None info('Reading configuration from %r.' % friendly_path(filename)) with open(filename, 'r') as f: for line in f: line = line.strip() if not line: continue if line[0] == '#': continue interpret_commands_wrap(line, context=context, cq=cq)
def manual_jobs(context, src_dirs, output_file, generate_pdf, stylesheet, use_mathjax, raise_errors, resolve_references=True, remove=None, filter_soup=None, extra_css=None, symbols=None, do_last_modified=False): """ src_dirs: list of sources symbols: a TeX preamble (or None) """ filenames = get_markdown_files(src_dirs) print('using:') print("\n".join(filenames)) if not filenames: msg = 'Could not find any file for composing the book.' raise Exception(msg) files_contents = [] for i, filename in enumerate(filenames): if is_ignored_by_catkin(filename): logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename) continue logger.info('adding document %s ' % friendly_path(filename)) docname, _ = os.path.splitext(os.path.basename(filename)) contents = open(filename).read() contents_hash = get_md5(contents)[:8] # because of hash job will be automatically erased if the source changes out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash) job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash) source_info = get_source_info(filename) # find the dir for d in src_dirs: if os.path.realpath(d) in filename: break else: msg = 'Could not find dir for %s in %s' % (filename, src_dirs) html_contents = context.comp(render_book, generate_pdf=generate_pdf, src_dirs=src_dirs, data=contents, realpath=filename, use_mathjax=use_mathjax, symbols=symbols, raise_errors=raise_errors, main_file=output_file, out_part_basename=out_part_basename, filter_soup=filter_soup, extra_css=extra_css, job_id=job_id) doc = DocToJoin(docname=out_part_basename, contents=html_contents, source_info=source_info) files_contents.append(tuple(doc)) # compmake doesn't do namedtuples bib_files = get_bib_files(src_dirs) logger.debug('Found bib files:\n%s' % "\n".join(bib_files)) if bib_files: bib_contents = job_bib_contents(context, bib_files) entry = DocToJoin(docname='bibtex', contents=bib_contents, source_info=None) files_contents.append(tuple(entry)) if do_last_modified: data = context.comp(make_last_modified, files_contents=files_contents) entry = DocToJoin(docname='last_modified', contents=data, source_info=None) files_contents.append(tuple(entry)) root_dir = src_dirs[0] template = get_main_template(root_dir) references = OrderedDict() # base_url = 'http://book.duckietown.org/master/duckiebook/pdoc' # for extra_dir in extra_dirs: # res = read_references(extra_dir, base_url, prefix='python:') # references.update(res) # extra = look_for_files(extra_dirs, "*.html") # # for filename in extra: # contents = open(filename).read() # docname = os.path.basename(filename) + '_' + get_md5(filename)[:5] # c = (('unused', docname), contents) # files_contents.append(c) d = context.comp(manual_join, template=template, files_contents=files_contents, stylesheet=stylesheet, remove=remove, references=references, resolve_references=resolve_references) context.comp(write, d, output_file) if os.path.exists(MCDPManualConstants.pdf_metadata_template): context.comp(generate_metadata, root_dir)
def manual_jobs(context, src_dirs, resources_dirs, out_split_dir, output_file, generate_pdf, stylesheet, stylesheet_pdf, use_mathjax, raise_errors, resolve_references=True, remove=None, filter_soup=None, symbols=None, out_pdf=None, only_refs=False, permalink_prefix=None, compose_config=None, output_crossref=None, do_last_modified=False, wordpress_integration=False, ignore_ref_errors=False, likebtn=None, extra_crossrefs=None): """ src_dirs: list of sources symbols: a TeX preamble (or None) """ # # if symbols is not None: # symbols = open(symbols).read() if stylesheet_pdf is None: stylesheet_pdf = stylesheet # outdir = os.path.dirname(out_split_dir) # XXX filenames = get_markdown_files(src_dirs) if not filenames: msg = "Could not find any file for composing the book." raise Exception(msg) files_contents = [] for i, filename in enumerate(filenames): if is_ignored_by_catkin(filename): logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename) continue logger.info('adding document %s ' % friendly_path(filename)) docname, _ = os.path.splitext(os.path.basename(filename)) contents = open(filename).read() contents_hash = get_md5(contents)[:8] # because of hash job will be automatically erased if the source changes out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash) job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash) try: source_info = get_source_info(filename) except NoSourceInfo as e: logger.warn('No source info for %s:\n%s' % (filename, e)) source_info = None for d in src_dirs: if filename.startswith(d): break else: msg = "Could not find dir for %s in %s" % (filename, src_dirs) raise Exception(msg) html_contents = context.comp(render_book, generate_pdf=generate_pdf, src_dirs=src_dirs + resources_dirs, data=contents, realpath=filename, use_mathjax=use_mathjax, symbols=symbols, raise_errors=raise_errors, filter_soup=filter_soup, ignore_ref_errors=ignore_ref_errors, job_id=job_id) doc = DocToJoin(docname=out_part_basename, contents=html_contents, source_info=source_info) files_contents.append(tuple(doc)) # compmake doesn't do namedtuples ignore = [] if output_crossref: ignore.append(output_crossref) crossrefs_aug = get_cross_refs(resources_dirs, permalink_prefix, extra_crossrefs, ignore=ignore) bib_files = get_bib_files(src_dirs) logger.debug('Found bib files:\n%s' % "\n".join(bib_files)) if bib_files: bib_contents_aug = job_bib_contents(context, bib_files) entry = DocToJoin(docname='bibtex', contents=bib_contents_aug, source_info=None) files_contents.append(tuple(entry)) if do_last_modified: data_aug = context.comp(make_last_modified, files_contents=files_contents) entry = DocToJoin(docname='last_modified', contents=data_aug, source_info=None) files_contents.append(tuple(entry)) root_dir = src_dirs[0] template = get_main_template(root_dir, resources_dirs) references = OrderedDict() # base_url = 'http://book.duckietown.org/master/duckiebook/pdoc' # for extra_dir in extra_dirs: # res = read_references(extra_dir, base_url, prefix='python:') # references.update(res) # extra = look_for_files(extra_dirs, "*.html") # # for filename in extra: # contents = open(filename).read() # docname = os.path.basename(filename) + '_' + get_md5(filename)[:5] # c = (('unused', docname), contents) # files_contents.append(c) cs = get_md5((crossrefs_aug.get_result()))[:8] joined_aug = context.comp(manual_join, template=template, files_contents=files_contents, stylesheet=None, remove=remove, references=references, resolve_references=resolve_references, crossrefs_aug=crossrefs_aug, permalink_prefix=permalink_prefix, job_id='join-%s' % cs) if compose_config is not None: try: data = yaml.load(open(compose_config).read()) # XXX compose_config_interpreted = ComposeConfig.from_yaml(data) except ValueError as e: msg = 'Cannot read YAML config file %s' % compose_config raise_wrapped(UserError, e, msg, compact=True) else: joined_aug = context.comp(make_composite, compose_config_interpreted, joined_aug) joined_aug = context.comp(mark_errors_and_rest, joined_aug) if likebtn: joined_aug = context.comp(add_likebtn, joined_aug, likebtn) if wordpress_integration: joined_aug = context.comp(add_related, joined_aug) if output_file is not None: context.comp(write, joined_aug, output_file) if out_split_dir is not None: joined_aug_with_html_stylesheet = context.comp(add_style, joined_aug, stylesheet) extra_panel_content = context.comp(get_extra_content, joined_aug_with_html_stylesheet) id2filename_aug = context.comp_dynamic( create_split_jobs, data_aug=joined_aug_with_html_stylesheet, mathjax=True, preamble=symbols, extra_panel_content=extra_panel_content, output_dir=out_split_dir, nworkers=0, output_crossref=output_crossref, permalink_prefix=permalink_prefix, only_refs=only_refs) if not only_refs: context.comp(write_errors_and_warnings_files, id2filename_aug, out_split_dir) context.comp(write_manifest_html, out_split_dir) if out_pdf is not None: joined_aug_with_pdf_stylesheet = context.comp(add_style, joined_aug, stylesheet_pdf) prerendered = context.comp(prerender, joined_aug_with_pdf_stylesheet, symbols=symbols) pdf_data = context.comp(render_pdf, prerendered) context.comp(write_data_to_file, pdf_data, out_pdf) context.comp(write_manifest_pdf, out_pdf)