def pandoc_call(md_tmp_file, tex_tmp_file, build_file_base): """ Call pandoc on tweaked markdown files. """ bib_file = BIB_FILE if not args.fast: fe_opts = '-c' if args.online_URLs_only: fe_opts += 'o' if args.URL_long: fe_opts += 'l' if args.bibtex: fe_opts += 'b' info("fe_opts %s" % fe_opts) call(['fe', fe_opts], stdout=open(BIB_FILE, 'w')) # generate a subset bibtex keys = md2bib.getKeysFromMD(md_tmp_file) entries = md2bib.parseBibTex(open(BIB_FILE, 'r')) subset = md2bib.subsetBibliography(entries, keys) md2bib.emitBibliography(subset, open(build_file_base + '.bib', 'w')) pandoc_opts = ['-t', 'latex', '--biblatex', '--bibliography=%s' %bib_file, '--no-wrap', '--tab-stop', '8'] pandoc_cmd = ['pandoc', md_tmp_file] pandoc_cmd.extend(pandoc_opts) info("pandoc cmd = '%s'" % ' '.join(pandoc_cmd)) call(pandoc_cmd, stdout=codecs.open(tex_tmp_file, 'w', 'utf-8'))
def process(args): if args.bibliography: bibtex_parsed = md2bib.parseBibTex(open(BIBTEX_FILE, 'r').readlines()) for in_file in args.files: ############################## # initial pandoc configuration based on arguments ############################## pandoc_opts = ['-s', '--smart', '--tab-stop', '4', '--email-obfuscation=references'] if args.presentation: args.validate = False args.css = False ## pandoc 1.11.1 # pandoc_opts.extend(['-t', 'html5', '--slide-level=2', # '--section-divs', # '--template', '/home/reagle/.templates/template.revealjs', # '-V', 'revealjs-url=../_reveal.js', # '-V', 'theme=moon', # '-c', '../_custom/revealjs.css']) # pandoc dev pandoc_opts.extend(['-t', 'revealjs', '--slide-level=2', '-V', 'revealjs-url=../_reveal.js', '-V', 'theme=moon', '-c', '../_custom/revealjs.css']) if args.css: pandoc_opts.extend(['-c', args.css]) if args.toc: pandoc_opts.extend(['--toc']) if args.offline: pandoc_opts.extend(['--self-contained']) if args.divs: pandoc_opts.extend(['--section-divs']) if args.include_after_body: pandoc_opts.extend(['--include-after-body=%s' % args.include_after_body[0]]) if args.style_chicago: args.style_csl = ['chicago-author-date.csl'] ############################## ## pre pandoc ############################## info("in_file = '%s'" %(in_file)) abs_fn = abspath(in_file) info("abs_fn = '%s'" %(abs_fn)) base_fn, base_ext = splitext(abs_fn) info("base_fn = '%s'" %(base_fn)) fn_path = os.path.split(abs_fn)[0] info("fn_path = '%s'" %(fn_path)) fn_tmp_1 = "%s-1%s" %(base_fn, base_ext) # as read fn_tmp_2 = "%s-2%s" %(base_fn, base_ext) # pre-pandoc fn_tmp_3 = "%s-3%s" %(base_fn, '.html') # post-pandoc cleanup_tmp_fns = [fn_tmp_1, fn_tmp_2, fn_tmp_3] if args.style_csl: print("args.style_csl = %s" % args.style_csl) pandoc_opts.extend(['--csl=%s' % args.style_csl[0]]) info("generate temporary subset bibtex for speed") BIB_FILE = HOME+'/joseph/readings.bib' bib_subset_tmp_fn = base_fn +'.bib' cleanup_tmp_fns.append(bib_subset_tmp_fn) keys = md2bib.getKeysFromMD(abs_fn) info("keys = %s" %keys) entries = md2bib.parseBibTex(open(BIB_FILE, 'r')) subset = md2bib.subsetBibliography(entries, keys) md2bib.emitBibliography(subset, open(bib_subset_tmp_fn, 'w')) pandoc_opts.extend(['--bibliography=%s' % bib_subset_tmp_fn,]) shutil.copyfile(abs_fn, fn_tmp_1) f1 = codecs.open(fn_tmp_1, 'r', "UTF-8", "replace") content = f1.read() if content[0] == codecs.BOM_UTF8.decode('utf8'): content = content[1:] f2 = codecs.open(fn_tmp_2, 'w', "UTF-8", "replace") print("split(abs_fn) = %s, %s" % (os.path.split(abs_fn))) # remove writemonkey repository and bookmarks content = content.split('***END OF FILE***')[0] content = content.replace('@@', '') if args.punctuation_outside: # move quotes and commas outside quotes content = content.replace('."', '".').replace(',"', '",') else: swap_punct_quote_re = re.compile(r'"( \[[^\[]+\])([,.])') content = swap_punct_quote_re.sub(r'\2"\1', content) lines = content.split('\n') for lineNo, line in enumerate(lines): # fix Wikicommons relative network-path references # so the URLs work on local file system (i.e.,'file:///') line = line.replace('src="//', 'src="http://') line = quash_citations(line) if args.bibliography: # create hypertext refs from bibtex db line = link_citations(line, bibtex_parsed) #info("\n** line is now %s" % line) #info("END line: '%s'" % line) f2.write(line + '\n') f1.close() f2.close() ############################## ## pandoc ############################## pandoc_cmd = ['pandoc', '-f', 'markdown+mmd_title_block'] pandoc_cmd.extend(pandoc_opts) pandoc_cmd.append(fn_tmp_2) print("pandoc_cmd: " + ' '.join(pandoc_cmd) + '\n') call(pandoc_cmd, stdout=open(fn_tmp_3, 'w')) info("done pandoc_cmd") if args.presentation: create_talk_handout(abs_fn, fn_tmp_2) ############################## ## post pandoc ############################## # final tweaks to tmp html file content = open(fn_tmp_3, 'r').read() # text alternations if args.british_punctuation: # swap double/single quotes content = content.replace('“', '“').replace('”', '”') single_quote_re = re.compile(r"(\W)‘(.{2,40}?)’(\W)") content = single_quote_re.sub(r'\1“\2”\3', content) content = content.replace('“', r"‘").replace('”', '’') # HTML alterations if args.number_elements: content = number_elements(content) result_fn = '%s.html' %(base_fn) info("result_fn = '%s'" %(result_fn)) if args.output: result_fn = args.output[0] open(result_fn, 'w').write(content) if args.validate: call(['tidy', '-utf8', '-q', '-i', '-m', '-w', '0', '-asxhtml', result_fn]) if args.launch_browser: info("launching %s" %result_fn) Popen([BROWSER, result_fn]) info("removing tmp files") for cleanup_fn in cleanup_tmp_fns: if exists(cleanup_fn): remove(cleanup_fn)