Ejemplo n.º 1
0
def pandoc_call(md_tmp_file, tex_tmp_file, build_file_base):
    """
    Call pandoc on tweaked markdown files.
    """

    bib_file = BIB_FILE
    if not args.fast: 
        fe_opts = '-c'
        if args.online_URLs_only: fe_opts += 'o'
        if args.URL_long: fe_opts += 'l'
        if args.bibtex: fe_opts += 'b'
        info("fe_opts %s" % fe_opts)
        call(['fe', fe_opts], stdout=open(BIB_FILE, 'w'))
        # generate a subset bibtex
        keys = md2bib.getKeysFromMD(md_tmp_file)
        entries = md2bib.parseBibTex(open(BIB_FILE, 'r'))
        subset = md2bib.subsetBibliography(entries, keys)
        md2bib.emitBibliography(subset, open(build_file_base + '.bib', 'w'))
                
    pandoc_opts = ['-t', 'latex', '--biblatex', '--bibliography=%s' %bib_file, '--no-wrap', '--tab-stop', '8']
    pandoc_cmd = ['pandoc', md_tmp_file]
    pandoc_cmd.extend(pandoc_opts)
    info("pandoc cmd = '%s'" % ' '.join(pandoc_cmd))
    call(pandoc_cmd, stdout=codecs.open(tex_tmp_file, 'w', 'utf-8'))
Ejemplo n.º 2
0
def process(args):
    
    if args.bibliography:
        bibtex_parsed = md2bib.parseBibTex(open(BIBTEX_FILE, 'r').readlines())

    for in_file in args.files:

        ##############################
        # initial pandoc configuration based on arguments
        ##############################

        pandoc_opts = ['-s', '--smart', '--tab-stop', '4', 
            '--email-obfuscation=references'] 
        if args.presentation:
            args.validate = False
            args.css = False
            ## pandoc 1.11.1
            # pandoc_opts.extend(['-t', 'html5', '--slide-level=2',
            #                     '--section-divs',
            #                     '--template', '/home/reagle/.templates/template.revealjs',
            #                     '-V', 'revealjs-url=../_reveal.js',
            #                     '-V', 'theme=moon',
            #                     '-c', '../_custom/revealjs.css'])
            # pandoc dev
            pandoc_opts.extend(['-t', 'revealjs', '--slide-level=2',
                                '-V', 'revealjs-url=../_reveal.js',
                                '-V', 'theme=moon',
                                '-c', '../_custom/revealjs.css'])
        if args.css:
            pandoc_opts.extend(['-c', args.css])
        if args.toc:
            pandoc_opts.extend(['--toc'])
        if args.offline:
            pandoc_opts.extend(['--self-contained'])
        if args.divs:
            pandoc_opts.extend(['--section-divs'])
        if args.include_after_body:
            pandoc_opts.extend(['--include-after-body=%s' % args.include_after_body[0]])
        if args.style_chicago:
            args.style_csl = ['chicago-author-date.csl']

        ##############################
        ##  pre pandoc
        ##############################

        info("in_file = '%s'" %(in_file))
        abs_fn = abspath(in_file)
        info("abs_fn = '%s'" %(abs_fn))
        
        base_fn, base_ext = splitext(abs_fn)
        info("base_fn = '%s'" %(base_fn))
        
        fn_path = os.path.split(abs_fn)[0]
        info("fn_path = '%s'" %(fn_path))

        fn_tmp_1 = "%s-1%s" %(base_fn, base_ext) # as read
        fn_tmp_2 = "%s-2%s" %(base_fn, base_ext) # pre-pandoc
        fn_tmp_3 = "%s-3%s" %(base_fn, '.html')  # post-pandoc
        cleanup_tmp_fns = [fn_tmp_1, fn_tmp_2, fn_tmp_3]

        if args.style_csl:
            print("args.style_csl = %s" % args.style_csl)
            pandoc_opts.extend(['--csl=%s' % args.style_csl[0]])
            info("generate temporary subset bibtex for speed")
            BIB_FILE = HOME+'/joseph/readings.bib'
            bib_subset_tmp_fn = base_fn +'.bib'
            cleanup_tmp_fns.append(bib_subset_tmp_fn)
            keys = md2bib.getKeysFromMD(abs_fn)
            info("keys = %s" %keys)
            entries = md2bib.parseBibTex(open(BIB_FILE, 'r'))
            subset = md2bib.subsetBibliography(entries, keys)
            md2bib.emitBibliography(subset, open(bib_subset_tmp_fn, 'w'))
            pandoc_opts.extend(['--bibliography=%s' % bib_subset_tmp_fn,])

        shutil.copyfile(abs_fn, fn_tmp_1)
        f1 = codecs.open(fn_tmp_1, 'r', "UTF-8", "replace")
        content = f1.read()
        if content[0] == codecs.BOM_UTF8.decode('utf8'):
            content = content[1:]
        f2 = codecs.open(fn_tmp_2, 'w', "UTF-8", "replace")

        print("split(abs_fn) = %s, %s" % (os.path.split(abs_fn)))
            
        # remove writemonkey repository and bookmarks
        content = content.split('***END OF FILE***')[0]
        content = content.replace('@@', '')

        if args.punctuation_outside: # move quotes and commas outside quotes
            content = content.replace('."', '".').replace(',"', '",')
        else:
            swap_punct_quote_re = re.compile(r'"( \[[^\[]+\])([,.])')
            content = swap_punct_quote_re.sub(r'\2"\1', content)
        
        lines = content.split('\n')
        
        for lineNo, line in enumerate(lines):
            # fix Wikicommons relative network-path references 
            # so the URLs work on local file system (i.e.,'file:///')
            line = line.replace('src="//', 'src="http://')
            line = quash_citations(line)
            if args.bibliography: # create hypertext refs from bibtex db
                line = link_citations(line, bibtex_parsed)
                #info("\n** line is now %s" % line)

            #info("END line: '%s'" % line)
            f2.write(line + '\n')
        f1.close()
        f2.close()
        
        ##############################
        ##  pandoc
        ##############################

        pandoc_cmd = ['pandoc', '-f', 'markdown+mmd_title_block']
        pandoc_cmd.extend(pandoc_opts)
        pandoc_cmd.append(fn_tmp_2)
        print("pandoc_cmd: " + ' '.join(pandoc_cmd) + '\n')
        call(pandoc_cmd, stdout=open(fn_tmp_3, 'w'))
        info("done pandoc_cmd")

        if args.presentation:
            create_talk_handout(abs_fn, fn_tmp_2)

        ##############################
        ##  post pandoc
        ##############################
        
        # final tweaks to tmp html file
        content = open(fn_tmp_3, 'r').read()
        
        # text alternations
        if args.british_punctuation: # swap double/single quotes
            content = content.replace('“', '“').replace('”', '”')
            single_quote_re = re.compile(r"(\W)‘(.{2,40}?)’(\W)")
            content = single_quote_re.sub(r'\1“\2”\3', content)
            content = content.replace('“', r"‘").replace('”', '’')

        # HTML alterations
        if args.number_elements:
            content = number_elements(content)

        result_fn = '%s.html' %(base_fn)
        info("result_fn = '%s'" %(result_fn))
        if args.output:
            result_fn = args.output[0]
        open(result_fn, 'w').write(content)
        
        if args.validate:
            call(['tidy', '-utf8', '-q', '-i', '-m', '-w', '0', '-asxhtml',
                    result_fn])
        if args.launch_browser:
            info("launching %s" %result_fn)
            Popen([BROWSER, result_fn])
            
        info("removing tmp files")
        for cleanup_fn in cleanup_tmp_fns:
            if exists(cleanup_fn):
                remove(cleanup_fn)