コード例 #1
0
ファイル: server.py プロジェクト: inpho/topic-explorer
        def cluster():
            with open(get_static_resource_path('www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('splash.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)
コード例 #2
0
ファイル: server.py プロジェクト: swalter62/topic-explorer
 def description():
     filename = kwargs.get('corpus_desc')
     if not filename:
         response.status = 404
         return "File not found"
     filename = get_static_resource_path(filename)
     root, filename = os.path.split(filename)
     return static_file(filename, root=root)
コード例 #3
0
        def cluster():
            with open(get_static_resource_path('www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('splash.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)
コード例 #4
0
ファイル: server.py プロジェクト: inpho/topic-explorer
 def description():
     filename = kwargs.get('corpus_desc')
     if not filename:
         response.status = 404
         return "File not found"
     filename = get_static_resource_path(filename)
     root, filename = os.path.split(filename)
     return static_file(filename, root=root)
コード例 #5
0
        def index(k):
            if k not in self.topic_range:
                abort(400, "No model for k = {}".format(k))

            with open(get_static_resource_path('www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('bars.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)
コード例 #6
0
        def _render_template(page):
            response.set_header('Expires', _cache_date())

            with open(get_static_resource_path('www/' + page),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'corpus_name': kwargs.get('corpus_name', ''),
                           'corpus_link': kwargs.get('corpus_link', ''),
                           'context_type': self.context_type,
                           'topic_range': self.topic_range,
                           'doc_title_format': kwargs.get('doc_title_format', '{0}'),
                           'doc_url_format': kwargs.get('doc_url_format', ''),
                           'home_link': kwargs.get('home_link', '/')}
            return self.renderer.render(template, tmpl_params)
コード例 #7
0
ファイル: server.py プロジェクト: inpho/topic-explorer
        def _render_template(page):
            response.set_header('Expires', _cache_date())

            with open(get_static_resource_path('www/' + page),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'corpus_name': kwargs.get('corpus_name', ''),
                           'corpus_link': kwargs.get('corpus_link', ''),
                           'context_type': self.context_type,
                           'topic_range': self.topic_range,
                           'doc_title_format': kwargs.get('doc_title_format', '{0}'),
                           'doc_url_format': kwargs.get('doc_url_format', ''),
                           'home_link': kwargs.get('home_link', '/')}
            return self.renderer.render(template, tmpl_params)
コード例 #8
0
def main(args=None, launch=True):
    download_and_extract()

    pwd = os.getcwd()

    init_parser = ArgumentParser()
    init.populate_parser(init_parser)
    args = init_parser.parse_args(
        ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q'])
    init.main(args)

    prep_parser = ArgumentParser()
    prep.populate_parser(prep_parser)
    args = prep_parser.parse_args(
        'ap.ini --lang en --high 2000 --low 5 -q'.split())
    prep.main(args)

    train_parser = ArgumentParser()
    train.populate_parser(train_parser)
    args = train_parser.parse_args(
        "ap.ini -k 20 40 60 --context-type article --iter 20".split())
    train.main(args)

    from configparser import RawConfigParser as ConfigParser
    config = ConfigParser()
    config.read('ap.ini')
    config.set("main", "label_module", "topicexplorer.extensions.ap")
    config.set("main", "corpus_desc", "ap.md")
    config.set("www", "icons", "ap,fingerprint,link")
    config.set("www", "fulltext", "True")
    shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md')
    with open("ap.ini", "w") as configfh:
        config.write(configfh)

    if launch:
        launch_parser = ArgumentParser()
        server.populate_parser(launch_parser)
        args = launch_parser.parse_args(['ap.ini'])
        server.main(args)
コード例 #9
0
ファイル: demo.py プロジェクト: inpho/topic-explorer
def main(args=None, launch=True):
    download_and_extract()
        
    pwd = os.getcwd()

    init_parser = ArgumentParser()
    init.populate_parser(init_parser)
    args = init_parser.parse_args(
        ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q'])
    init.main(args)

    prep_parser = ArgumentParser()
    prep.populate_parser(prep_parser)
    args = prep_parser.parse_args('ap.ini --lang en --high 2000 --low 5 -q'.split())
    prep.main(args)

    train_parser = ArgumentParser()
    train.populate_parser(train_parser)
    args = train_parser.parse_args("ap.ini -k 20 40 60 --context-type article --iter 20".split())
    train.main(args)

    import topicexplorer.config
    config = topicexplorer.config.read('ap.ini')
    config.set("main", "label_module", "topicexplorer.extensions.ap")
    config.set("main", "corpus_desc", "ap.md")
    config.set("www", "icons", "ap,fingerprint,link")
    config.set("www", "fulltext", "True")
    shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md')
    with open("ap.ini", "w") as configfh:
        config.write(configfh)

    if launch:
        launch_parser = ArgumentParser()
        server.populate_parser(launch_parser)
        args = launch_parser.parse_args(['ap.ini'])
        server.main(args)
コード例 #10
0
ファイル: server.py プロジェクト: swalter62/topic-explorer
 def icons():
     with open(get_static_resource_path('www/icons.js')) as icons:
         text = '{0}\n var icons = {1};'\
             .format(icons.read(), json.dumps(self.icons))
     return text
コード例 #11
0
ファイル: chinese.py プロジェクト: swalter62/topic-explorer
# -*- coding: utf-8 -*-
import os

import platform
#updated to use pymmseg function calls instead of plain mmseg

import string
from topicexplorer.lib.util import get_static_resource_path

import os.path
import topicexplorer.lib.mmseg as mmseg
modern_dic = mmseg.Dict(get_static_resource_path("mmseg/modern_words.dic"))
ancient_dic = mmseg.Dict(get_static_resource_path("mmseg/ancient_words.dic"))
chrs = mmseg.CharFreqs(get_static_resource_path("mmseg/chars.dic"))
ancient_mmseg = mmseg.MMSeg(ancient_dic, chrs)
modern_mmseg = mmseg.MMSeg(modern_dic, chrs)


def is_flagged(toke_number):
    # '基本汉字'(Basic Chinese Character),20902 characters,'4E00-9FA5'
    return ((toke_number > ord(u'\u4E00') - 1 and toke_number < ord(u'\u9FEF') + 1) or \
            # '扩展A'(Expansion A),6582 characters,'3400-4DB5'

                (toke_number > ord(u'\u3400') - 1 and toke_number < ord(u'\u4DB5') + 1) or \
            # '扩展B'(Expansion B),42711 characters,'20000-2A6D6'

                (toke_number > int('20000', 16) - 1 and toke_number < int('2A6D6', 16) + 1) or \
            # '扩展C'(Expansion C),4149 characters,'2A700-2B734'

                (toke_number > int('2A700', 16) - 1 and toke_number < int('2B734', 16) + 1) or \
            # '扩展D'(Expansion D),222 characters,'2B740-2B81D'
コード例 #12
0
ファイル: notebook.py プロジェクト: inpho/topic-explorer
def main(args):
    args.config_file = os.path.abspath(args.config_file)
    with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)

    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print(ipynb_path)
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print("Writing", filename)
        with open(filename, 'w') as corpusloader:
            corpusloader.write(corpus_py)
    pyflag = 'py2' if sys.version_info.major == 2 else 'py3'
    glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag)

    for notebook in glob(glob_path):
        new_nb_name = os.path.basename(notebook).replace('.' +pyflag, '')
        new_nb_path = os.path.join(ipynb_path, new_nb_name)
        if overwrite_prompt(new_nb_path, default=False):
            print("Copying", notebook)
            shutil.copy(notebook, new_nb_path)

    if args.launch:
        import subprocess
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn)
            # stdin=subprocess.PIPE, preexec_fn=grp_fn)
            # stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print("ERROR: Command `jupyter notebook` not found.")
            print("       If IPython or Anaconda is installed, check your PATH variable.")
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal, frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)])
                sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print("\nPress Ctrl+C to shutdown the IPython notebook server\n")

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)
コード例 #13
0
def main(args):
    args.config_file = os.path.abspath(args.config_file)
    with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl:
        corpus_py = corpustmpl.read()
        corpus_py = Template(corpus_py)
        corpus_py = corpus_py.safe_substitute(config_file=args.config_file)

    ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks")
    print(ipynb_path)
    if not os.path.exists(ipynb_path):
        os.makedirs(ipynb_path)

    filename = os.path.join(ipynb_path, "corpus.py")

    if overwrite_prompt(filename, default=True):
        print("Writing", filename)
        with open(filename, 'w') as corpusloader:
            corpusloader.write(corpus_py)
    pyflag = 'py2' if sys.version_info.major == 2 else 'py3'
    glob_path = (get_static_resource_path('ipynb') +
                 '/*.{}.ipynb').format(pyflag)

    for notebook in glob(glob_path):
        new_nb_name = os.path.basename(notebook).replace('.' + pyflag, '')
        new_nb_path = os.path.join(ipynb_path, new_nb_name)
        if overwrite_prompt(new_nb_path, default=False):
            print("Copying", notebook)
            shutil.copy(notebook, new_nb_path)

    if args.launch:
        import subprocess
        os.chdir(ipynb_path)
        try:
            # TODO: Fix KeyboardInterrupt errors
            try:
                grp_fn = os.setsid
            except AttributeError:
                grp_fn = None
            proc = subprocess.Popen("jupyter notebook",
                                    shell=True,
                                    preexec_fn=grp_fn)
            # stdin=subprocess.PIPE, preexec_fn=grp_fn)
            # stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        except OSError:
            print("ERROR: Command `jupyter notebook` not found.")
            print(
                "       If IPython or Anaconda is installed, check your PATH variable."
            )
            sys.exit(1)

        # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK
        def signal_handler(signal, frame):
            # Cross-Platform Compatability
            try:
                os.killpg(proc.pid, signal)
                proc.communicate()
            except AttributeError:
                subprocess.call(
                    ['taskkill', '/F', '/T', '/PID',
                     str(proc.pid)])
                sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        print("\nPress Ctrl+C to shutdown the IPython notebook server\n")

        # Cross-platform Compatability
        try:
            signal.pause()
        except AttributeError:
            # Windows hack
            while True:
                time.sleep(1)
コード例 #14
0
ファイル: server.py プロジェクト: inpho/topic-explorer
 def icons():
     with open(get_static_resource_path('www/icons.js')) as icons:
         text = '{0}\n var icons = {1};'\
             .format(icons.read(), json.dumps(self.icons))
     return text
コード例 #15
0
ファイル: server.py プロジェクト: swalter62/topic-explorer
 def send_static(filename):
     return static_file(filename, root=get_static_resource_path('www/'))
コード例 #16
0
ファイル: chinese.py プロジェクト: inpho/topic-explorer
# -*- coding: utf-8 -*-
import os

import platform
#updated to use pymmseg function calls instead of plain mmseg

import string
from topicexplorer.lib.util import get_static_resource_path

import os.path
import topicexplorer.lib.mmseg as mmseg
modern_dic = mmseg.Dict(get_static_resource_path("mmseg/modern_words.dic"))
ancient_dic = mmseg.Dict(get_static_resource_path("mmseg/ancient_words.dic"))
chrs = mmseg.CharFreqs(get_static_resource_path("mmseg/chars.dic"))
ancient_mmseg = mmseg.MMSeg(ancient_dic, chrs)
modern_mmseg = mmseg.MMSeg(modern_dic, chrs)

def is_flagged(toke_number):
             # '基本汉字'(Basic Chinese Character),20902 characters,'4E00-9FA5'
    return ((toke_number > ord(u'\u4E00') - 1 and toke_number < ord(u'\u9FEF') + 1) or \
            # '扩展A'(Expansion A),6582 characters,'3400-4DB5'
                (toke_number > ord(u'\u3400') - 1 and toke_number < ord(u'\u4DB5') + 1) or \
            # '扩展B'(Expansion B),42711 characters,'20000-2A6D6'
                (toke_number > int('20000', 16) - 1 and toke_number < int('2A6D6', 16) + 1) or \
            # '扩展C'(Expansion C),4149 characters,'2A700-2B734'
                (toke_number > int('2A700', 16) - 1 and toke_number < int('2B734', 16) + 1) or \
            # '扩展D'(Expansion D),222 characters,'2B740-2B81D'
                (toke_number > int('2B740', 16) - 1 and toke_number < int('2B81D', 16) + 1) or \
            # '扩展E'(Expansion E),5762 characters,'2B320-2CEA1'
                (toke_number > int('2B820', 16) - 1 and toke_number < int('2CEA1', 16) + 1) or \
            # '扩展F'(Expansion F),7473 characters,'2CEB0-2EBE0'
コード例 #17
0
ファイル: server.py プロジェクト: inpho/topic-explorer
 def send_static(filename):
     return static_file(filename, root=get_static_resource_path('www/'))