def process_bibtex(corpus_path): import pybtex from pybtex.database import parse_file from topicexplorer.lib.util import overwrite_prompt, safe_symlink print "Loading BibTeX from", corpus_path bib = parse_file(corpus_path) target_dir = os.path.basename(corpus_path).replace('.bib','') if not os.path.exists(target_dir): os.makedirs(target_dir) elif overwrite_prompt(target_dir): shutil.rmtree(target_dir) os.makedirs(target_dir) else: raise IOError("Path exits: {}".format(target_dir)) for entry in bib.entries: if bib.entries[entry].fields.get('file', None): filename = '/' + bib.entries[entry].fields['file'].replace(':pdf','')[1:] if 'C$\\backslash$:' in filename: filename = filename.replace('C$\\backslash$:', '') filename = filename[1:] filename = os.path.normpath(filename) filename = os.path.abspath(filename) if not os.path.exists(filename): print "Invalid 'file' field for BibTeX entry {}:\n\t({})".format(entry, filename) else: new_path = os.path.join(target_dir, os.path.basename(filename)) safe_symlink(filename, new_path) else: print "No 'file' field for BibTeX entry: {}".format(entry) return target_dir
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False): output = os.path.basename(fname) output = output.replace('.pdf','.txt') if output_dir: output = os.path.join(output_dir, output) if output_dir is not None and not os.path.exists(output_dir): os.makedirs(output_dir) if overwrite or util.overwrite_prompt(output): with open(output, 'wb') as outfile: outfile.write(convert(fname)) if verbose: print "converted", fname, "->", output
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False): output = os.path.basename(fname) output = output.replace('.pdf', '.txt') if output_dir: output = os.path.join(output_dir, output) if output_dir is not None and not os.path.exists(output_dir): os.makedirs(output_dir) if overwrite or util.overwrite_prompt(output): with open(output, 'wb') as outfile: outfile.write(convert(fname)) if verbose: print "converted", fname, "->", output
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False, tokenizer='modern'): output = os.path.basename(fname) if output_dir: output = os.path.join(output_dir, output) if output_dir is not None and not os.path.exists(output_dir): os.makedirs(output_dir) if overwrite or util.overwrite_prompt(output): with open(output, 'wb', encoding='utf8') as outfile: outfile.write(convert(fname, tokenizer=tokenizer)) if verbose: print "converted", fname, "->", output
def process_bibtex(corpus_path, quiet): import pybtex from pybtex.database import parse_file from topicexplorer.lib.util import overwrite_prompt, safe_symlink print("Loading BibTeX from", corpus_path) bib = parse_file(corpus_path) target_dir = os.path.basename(corpus_path).replace('.bib', '') if not os.path.exists(target_dir): os.makedirs(target_dir) elif overwrite_prompt(target_dir) and not quiet: shutil.rmtree(target_dir) os.makedirs(target_dir) else: raise IOError("Path exits: {}".format(target_dir)) for entry in bib.entries: if bib.entries[entry].fields.get('file', None): filename = '/' + bib.entries[entry].fields['file'].replace( ':pdf', '')[1:] if 'C$\\backslash$:' in filename: filename = filename.replace('C$\\backslash$:', '') filename = filename[1:] filename = os.path.normpath(filename) filename = os.path.abspath(filename) if not os.path.exists(filename): print( "Invalid 'file' field for BibTeX entry {}:\n\t({})".format( entry, filename)) else: new_path = os.path.join(target_dir, os.path.basename(filename)) try: safe_symlink(filename, new_path) except OSError: print("Error linking file for BibTeX entry {}:\n\t({})". format(entry, filename)) else: print("No 'file' field for BibTeX entry: {}".format(entry)) return target_dir
def main(args): args.config_file = os.path.abspath(args.config_file) with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print(ipynb_path) if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print("Writing", filename) with open(filename, 'w') as corpusloader: corpusloader.write(corpus_py) pyflag = 'py2' if sys.version_info.major == 2 else 'py3' glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag) for notebook in glob(glob_path): new_nb_name = os.path.basename(notebook).replace('.' +pyflag, '') new_nb_path = os.path.join(ipynb_path, new_nb_name) if overwrite_prompt(new_nb_path, default=False): print("Copying", notebook) shutil.copy(notebook, new_nb_path) if args.launch: import subprocess os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn) # stdin=subprocess.PIPE, preexec_fn=grp_fn) # stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print("ERROR: Command `jupyter notebook` not found.") print(" If IPython or Anaconda is installed, check your PATH variable.") sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal, frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print("\nPress Ctrl+C to shutdown the IPython notebook server\n") # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): args.config_file = os.path.abspath(args.config_file) template_dir = os.path.dirname(__file__) template_dir = os.path.join(template_dir, '../ipynb/') template_dir = os.path.normpath(template_dir) with open(os.path.join(template_dir, 'corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print ipynb_path if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print "Writing", filename with open(filename, 'w') as corpusloader: corpusloader.write(corpus_py) for notebook in glob(template_dir + '/*.ipynb'): new_nb_path = os.path.join(ipynb_path, os.path.basename(notebook)) if overwrite_prompt(new_nb_path, default=False): print "Copying", notebook shutil.copy(notebook, ipynb_path) if args.launch: import subprocess, sys os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("ipython notebook", shell=True, preexec_fn=grp_fn) #stdin=subprocess.PIPE, preexec_fn=grp_fn) #stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print "ERROR: Command `ipython notebook` not found." print " If IPython or Anaconda is installed, check your PATH variable." sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal, frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call( ['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print "\nPress Ctrl+C to shutdown the IPython notebook server\n" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): args.config_file = os.path.abspath(args.config_file) with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print(ipynb_path) if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print("Writing", filename) with open(filename, 'w') as corpusloader: corpusloader.write(corpus_py) pyflag = 'py2' if sys.version_info.major == 2 else 'py3' glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag) for notebook in glob(glob_path): new_nb_name = os.path.basename(notebook).replace('.' + pyflag, '') new_nb_path = os.path.join(ipynb_path, new_nb_name) if overwrite_prompt(new_nb_path, default=False): print("Copying", notebook) shutil.copy(notebook, new_nb_path) if args.launch: import subprocess os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn) # stdin=subprocess.PIPE, preexec_fn=grp_fn) # stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print("ERROR: Command `jupyter notebook` not found.") print( " If IPython or Anaconda is installed, check your PATH variable." ) sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal, frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call( ['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print("\nPress Ctrl+C to shutdown the IPython notebook server\n") # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): args.config_file = os.path.abspath(args.config_file) template_dir = os.path.dirname(__file__) template_dir = os.path.join(template_dir, '../ipynb/') template_dir = os.path.normpath(template_dir) with open(os.path.join(template_dir, 'corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print ipynb_path if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print "Writing", filename with open(filename,'w') as corpusloader: corpusloader.write(corpus_py) for notebook in glob(os.path.join(template_dir, '*.ipynb')): new_nb_path = os.path.join(ipynb_path, os.path.basename(notebook)) if overwrite_prompt(new_nb_path, default=False): print "Copying", notebook shutil.copy(notebook, ipynb_path) if args.launch: import subprocess, sys os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("ipython notebook", shell=True, preexec_fn=grp_fn) #stdin=subprocess.PIPE, preexec_fn=grp_fn) #stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print "ERROR: Command `ipython notebook` not found." print " If IPython or Anaconda is installed, check your PATH variable." sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal,frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print "\nPress Ctrl+C to shutdown the IPython notebook server\n" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)