Ejemplo n.º 1
0
def process_bibtex(corpus_path):
    import pybtex
    from pybtex.database import parse_file
    from topicexplorer.lib.util import overwrite_prompt, safe_symlink

    print "Loading BibTeX from", corpus_path 
    bib = parse_file(corpus_path)
    
    target_dir = os.path.basename(corpus_path).replace('.bib','')
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    elif overwrite_prompt(target_dir):
        shutil.rmtree(target_dir)
        os.makedirs(target_dir)
    else:
        raise IOError("Path exits: {}".format(target_dir))


    for entry in bib.entries:
        if bib.entries[entry].fields.get('file', None):
            filename = '/' + bib.entries[entry].fields['file'].replace(':pdf','')[1:]
            if 'C$\\backslash$:' in filename:
                filename = filename.replace('C$\\backslash$:', '') 
                filename = filename[1:]
                filename = os.path.normpath(filename)
            filename = os.path.abspath(filename)
            if not os.path.exists(filename):
                print "Invalid 'file' field for BibTeX entry {}:\n\t({})".format(entry, filename)
            else:
                new_path = os.path.join(target_dir, os.path.basename(filename))
                safe_symlink(filename, new_path)
        else:
            print "No 'file' field for BibTeX entry: {}".format(entry)

    return target_dir
Ejemplo n.º 2
0
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False):
    output = os.path.basename(fname) 
    output = output.replace('.pdf','.txt')
    if output_dir:
        output = os.path.join(output_dir, output)
    if output_dir is not None and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if overwrite or util.overwrite_prompt(output):
        with open(output, 'wb') as outfile:
            outfile.write(convert(fname))
            if verbose:
                print "converted", fname, "->", output
Ejemplo n.º 3
0
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False):
    output = os.path.basename(fname)
    output = output.replace('.pdf', '.txt')
    if output_dir:
        output = os.path.join(output_dir, output)
    if output_dir is not None and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if overwrite or util.overwrite_prompt(output):
        with open(output, 'wb') as outfile:
            outfile.write(convert(fname))
            if verbose:
                print "converted", fname, "->", output
Ejemplo n.º 4
0
def convert_and_write(fname, output_dir=None, overwrite=False, verbose=False,
    tokenizer='modern'):

    output = os.path.basename(fname) 
    if output_dir:
        output = os.path.join(output_dir, output)
    if output_dir is not None and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if overwrite or util.overwrite_prompt(output):
        with open(output, 'wb', encoding='utf8') as outfile:
            outfile.write(convert(fname, tokenizer=tokenizer))
            if verbose:
                print "converted", fname, "->", output
Ejemplo n.º 5
0
def convert_and_write(fname,
                      output_dir=None,
                      overwrite=False,
                      verbose=False,
                      tokenizer='modern'):

    output = os.path.basename(fname)
    if output_dir:
        output = os.path.join(output_dir, output)
    if output_dir is not None and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if overwrite or util.overwrite_prompt(output):
        with open(output, 'wb', encoding='utf8') as outfile:
            outfile.write(convert(fname, tokenizer=tokenizer))
            if verbose:
                print "converted", fname, "->", output
Ejemplo n.º 6
0
def process_bibtex(corpus_path, quiet):
    import pybtex
    from pybtex.database import parse_file
    from topicexplorer.lib.util import overwrite_prompt, safe_symlink

    print("Loading BibTeX from", corpus_path)
    bib = parse_file(corpus_path)

    target_dir = os.path.basename(corpus_path).replace('.bib', '')
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    elif overwrite_prompt(target_dir) and not quiet:
        shutil.rmtree(target_dir)
        os.makedirs(target_dir)
    else:
        raise IOError("Path exits: {}".format(target_dir))

    for entry in bib.entries:
        if bib.entries[entry].fields.get('file', None):
            filename = '/' + bib.entries[entry].fields['file'].replace(
                ':pdf', '')[1:]
            if 'C$\\backslash$:' in filename:
                filename = filename.replace('C$\\backslash$:', '')
                filename = filename[1:]
                filename = os.path.normpath(filename)
            filename = os.path.abspath(filename)
            if not os.path.exists(filename):
                print(
                    "Invalid 'file' field for BibTeX entry {}:\n\t({})".format(
                        entry, filename))
            else:
                new_path = os.path.join(target_dir, os.path.basename(filename))
                try:
                    safe_symlink(filename, new_path)
                except OSError:
                    print("Error linking file for BibTeX entry {}:\n\t({})".
                          format(entry, filename))
        else:
            print("No 'file' field for BibTeX entry: {}".format(entry))

    return target_dir
Ejemplo n.º 7
0
def main(args):
    args.config_file = os.path.abspath(args.config_file)
    with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)

    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print(ipynb_path)
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print("Writing", filename)
        with open(filename, 'w') as corpusloader:
            corpusloader.write(corpus_py)
    pyflag = 'py2' if sys.version_info.major == 2 else 'py3'
    glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag)

    for notebook in glob(glob_path):
        new_nb_name = os.path.basename(notebook).replace('.' +pyflag, '')
        new_nb_path = os.path.join(ipynb_path, new_nb_name)
        if overwrite_prompt(new_nb_path, default=False):
            print("Copying", notebook)
            shutil.copy(notebook, new_nb_path)

    if args.launch:
        import subprocess
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn)
            # stdin=subprocess.PIPE, preexec_fn=grp_fn)
            # stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print("ERROR: Command `jupyter notebook` not found.")
            print("       If IPython or Anaconda is installed, check your PATH variable.")
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal, frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)])
                sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print("\nPress Ctrl+C to shutdown the IPython notebook server\n")

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)
Ejemplo n.º 8
0
def main(args):
    args.config_file = os.path.abspath(args.config_file)

    template_dir = os.path.dirname(__file__)
    template_dir = os.path.join(template_dir, '../ipynb/')
    template_dir = os.path.normpath(template_dir)
    with open(os.path.join(template_dir, 'corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)

    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print ipynb_path
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print "Writing", filename
        with open(filename, 'w') as corpusloader:
            corpusloader.write(corpus_py)

    for notebook in glob(template_dir + '/*.ipynb'):
        new_nb_path = os.path.join(ipynb_path, os.path.basename(notebook))
        if overwrite_prompt(new_nb_path, default=False):
            print "Copying", notebook
            shutil.copy(notebook, ipynb_path)

    if args.launch:
        import subprocess, sys
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("ipython notebook",
                                    shell=True,
                                    preexec_fn=grp_fn)
            #stdin=subprocess.PIPE, preexec_fn=grp_fn)
            #stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print "ERROR: Command `ipython notebook` not found."
            print "       If IPython or Anaconda is installed, check your PATH variable."
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal, frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(
                    ['taskkill', '/F', '/T', '/PID',
                     str(proc.pid)])
                sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print "\nPress Ctrl+C to shutdown the IPython notebook server\n"

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)
Ejemplo n.º 9
0
def main(args):
    args.config_file = os.path.abspath(args.config_file)
    with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)

    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print(ipynb_path)
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print("Writing", filename)
        with open(filename, 'w') as corpusloader:
            corpusloader.write(corpus_py)
    pyflag = 'py2' if sys.version_info.major == 2 else 'py3'
    glob_path = (get_static_resource_path('ipynb') +
                 '/*.{}.ipynb').format(pyflag)

    for notebook in glob(glob_path):
        new_nb_name = os.path.basename(notebook).replace('.' + pyflag, '')
        new_nb_path = os.path.join(ipynb_path, new_nb_name)
        if overwrite_prompt(new_nb_path, default=False):
            print("Copying", notebook)
            shutil.copy(notebook, new_nb_path)

    if args.launch:
        import subprocess
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("jupyter notebook",
                                    shell=True,
                                    preexec_fn=grp_fn)
            # stdin=subprocess.PIPE, preexec_fn=grp_fn)
            # stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print("ERROR: Command `jupyter notebook` not found.")
            print(
                "       If IPython or Anaconda is installed, check your PATH variable."
            )
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal, frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(
                    ['taskkill', '/F', '/T', '/PID',
                     str(proc.pid)])
                sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print("\nPress Ctrl+C to shutdown the IPython notebook server\n")

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)
Ejemplo n.º 10
0
def main(args):
    args.config_file = os.path.abspath(args.config_file)
   
    template_dir = os.path.dirname(__file__)
    template_dir = os.path.join(template_dir, '../ipynb/')
    template_dir = os.path.normpath(template_dir)
    with open(os.path.join(template_dir, 'corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)
    
    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print ipynb_path
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print "Writing", filename
        with open(filename,'w') as corpusloader:
            corpusloader.write(corpus_py)

    for notebook in glob(os.path.join(template_dir, '*.ipynb')):
        new_nb_path = os.path.join(ipynb_path, os.path.basename(notebook))
        if overwrite_prompt(new_nb_path, default=False):
            print "Copying", notebook
            shutil.copy(notebook, ipynb_path)

    if args.launch:
        import subprocess, sys
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("ipython notebook", shell=True, preexec_fn=grp_fn)
                #stdin=subprocess.PIPE, preexec_fn=grp_fn)
                #stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print "ERROR: Command `ipython notebook` not found."
            print "       If IPython or Anaconda is installed, check your PATH variable."
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal,frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)])    
                sys.exit(0)
    
        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print "\nPress Ctrl+C to shutdown the IPython notebook server\n"

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)