def cluster(): with open(get_static_resource_path('www/master.mustache.html'), encoding='utf-8') as tmpl_file: template = tmpl_file.read() tmpl_params = {'body' : _render_template('splash.mustache.html'), 'topic_range': self.topic_range} return self.renderer.render(template, tmpl_params)
def description(): filename = kwargs.get('corpus_desc') if not filename: response.status = 404 return "File not found" filename = get_static_resource_path(filename) root, filename = os.path.split(filename) return static_file(filename, root=root)
def cluster(): with open(get_static_resource_path('www/master.mustache.html'), encoding='utf-8') as tmpl_file: template = tmpl_file.read() tmpl_params = {'body' : _render_template('splash.mustache.html'), 'topic_range': self.topic_range} return self.renderer.render(template, tmpl_params)
def description(): filename = kwargs.get('corpus_desc') if not filename: response.status = 404 return "File not found" filename = get_static_resource_path(filename) root, filename = os.path.split(filename) return static_file(filename, root=root)
def index(k): if k not in self.topic_range: abort(400, "No model for k = {}".format(k)) with open(get_static_resource_path('www/master.mustache.html'), encoding='utf-8') as tmpl_file: template = tmpl_file.read() tmpl_params = {'body' : _render_template('bars.mustache.html'), 'topic_range': self.topic_range} return self.renderer.render(template, tmpl_params)
def _render_template(page): response.set_header('Expires', _cache_date()) with open(get_static_resource_path('www/' + page), encoding='utf-8') as tmpl_file: template = tmpl_file.read() tmpl_params = {'corpus_name': kwargs.get('corpus_name', ''), 'corpus_link': kwargs.get('corpus_link', ''), 'context_type': self.context_type, 'topic_range': self.topic_range, 'doc_title_format': kwargs.get('doc_title_format', '{0}'), 'doc_url_format': kwargs.get('doc_url_format', ''), 'home_link': kwargs.get('home_link', '/')} return self.renderer.render(template, tmpl_params)
def _render_template(page): response.set_header('Expires', _cache_date()) with open(get_static_resource_path('www/' + page), encoding='utf-8') as tmpl_file: template = tmpl_file.read() tmpl_params = {'corpus_name': kwargs.get('corpus_name', ''), 'corpus_link': kwargs.get('corpus_link', ''), 'context_type': self.context_type, 'topic_range': self.topic_range, 'doc_title_format': kwargs.get('doc_title_format', '{0}'), 'doc_url_format': kwargs.get('doc_url_format', ''), 'home_link': kwargs.get('home_link', '/')} return self.renderer.render(template, tmpl_params)
def main(args=None, launch=True): download_and_extract() pwd = os.getcwd() init_parser = ArgumentParser() init.populate_parser(init_parser) args = init_parser.parse_args( ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q']) init.main(args) prep_parser = ArgumentParser() prep.populate_parser(prep_parser) args = prep_parser.parse_args( 'ap.ini --lang en --high 2000 --low 5 -q'.split()) prep.main(args) train_parser = ArgumentParser() train.populate_parser(train_parser) args = train_parser.parse_args( "ap.ini -k 20 40 60 --context-type article --iter 20".split()) train.main(args) from configparser import RawConfigParser as ConfigParser config = ConfigParser() config.read('ap.ini') config.set("main", "label_module", "topicexplorer.extensions.ap") config.set("main", "corpus_desc", "ap.md") config.set("www", "icons", "ap,fingerprint,link") config.set("www", "fulltext", "True") shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md') with open("ap.ini", "w") as configfh: config.write(configfh) if launch: launch_parser = ArgumentParser() server.populate_parser(launch_parser) args = launch_parser.parse_args(['ap.ini']) server.main(args)
def main(args=None, launch=True): download_and_extract() pwd = os.getcwd() init_parser = ArgumentParser() init.populate_parser(init_parser) args = init_parser.parse_args( ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q']) init.main(args) prep_parser = ArgumentParser() prep.populate_parser(prep_parser) args = prep_parser.parse_args('ap.ini --lang en --high 2000 --low 5 -q'.split()) prep.main(args) train_parser = ArgumentParser() train.populate_parser(train_parser) args = train_parser.parse_args("ap.ini -k 20 40 60 --context-type article --iter 20".split()) train.main(args) import topicexplorer.config config = topicexplorer.config.read('ap.ini') config.set("main", "label_module", "topicexplorer.extensions.ap") config.set("main", "corpus_desc", "ap.md") config.set("www", "icons", "ap,fingerprint,link") config.set("www", "fulltext", "True") shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md') with open("ap.ini", "w") as configfh: config.write(configfh) if launch: launch_parser = ArgumentParser() server.populate_parser(launch_parser) args = launch_parser.parse_args(['ap.ini']) server.main(args)
def icons(): with open(get_static_resource_path('www/icons.js')) as icons: text = '{0}\n var icons = {1};'\ .format(icons.read(), json.dumps(self.icons)) return text
# -*- coding: utf-8 -*- import os import platform #updated to use pymmseg function calls instead of plain mmseg import string from topicexplorer.lib.util import get_static_resource_path import os.path import topicexplorer.lib.mmseg as mmseg modern_dic = mmseg.Dict(get_static_resource_path("mmseg/modern_words.dic")) ancient_dic = mmseg.Dict(get_static_resource_path("mmseg/ancient_words.dic")) chrs = mmseg.CharFreqs(get_static_resource_path("mmseg/chars.dic")) ancient_mmseg = mmseg.MMSeg(ancient_dic, chrs) modern_mmseg = mmseg.MMSeg(modern_dic, chrs) def is_flagged(toke_number): # '基本汉字'(Basic Chinese Character),20902 characters,'4E00-9FA5' return ((toke_number > ord(u'\u4E00') - 1 and toke_number < ord(u'\u9FEF') + 1) or \ # '扩展A'(Expansion A),6582 characters,'3400-4DB5' (toke_number > ord(u'\u3400') - 1 and toke_number < ord(u'\u4DB5') + 1) or \ # '扩展B'(Expansion B),42711 characters,'20000-2A6D6' (toke_number > int('20000', 16) - 1 and toke_number < int('2A6D6', 16) + 1) or \ # '扩展C'(Expansion C),4149 characters,'2A700-2B734' (toke_number > int('2A700', 16) - 1 and toke_number < int('2B734', 16) + 1) or \ # '扩展D'(Expansion D),222 characters,'2B740-2B81D'
def main(args): args.config_file = os.path.abspath(args.config_file) with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print(ipynb_path) if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print("Writing", filename) with open(filename, 'w') as corpusloader: corpusloader.write(corpus_py) pyflag = 'py2' if sys.version_info.major == 2 else 'py3' glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag) for notebook in glob(glob_path): new_nb_name = os.path.basename(notebook).replace('.' +pyflag, '') new_nb_path = os.path.join(ipynb_path, new_nb_name) if overwrite_prompt(new_nb_path, default=False): print("Copying", notebook) shutil.copy(notebook, new_nb_path) if args.launch: import subprocess os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn) # stdin=subprocess.PIPE, preexec_fn=grp_fn) # stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print("ERROR: Command `jupyter notebook` not found.") print(" If IPython or Anaconda is installed, check your PATH variable.") sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal, frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call(['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print("\nPress Ctrl+C to shutdown the IPython notebook server\n") # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): args.config_file = os.path.abspath(args.config_file) with open(get_static_resource_path('ipynb/corpus.tmpl.py')) as corpustmpl: corpus_py = corpustmpl.read() corpus_py = Template(corpus_py) corpus_py = corpus_py.safe_substitute(config_file=args.config_file) ipynb_path = os.path.join(os.path.dirname(args.config_file), "notebooks") print(ipynb_path) if not os.path.exists(ipynb_path): os.makedirs(ipynb_path) filename = os.path.join(ipynb_path, "corpus.py") if overwrite_prompt(filename, default=True): print("Writing", filename) with open(filename, 'w') as corpusloader: corpusloader.write(corpus_py) pyflag = 'py2' if sys.version_info.major == 2 else 'py3' glob_path = (get_static_resource_path('ipynb') + '/*.{}.ipynb').format(pyflag) for notebook in glob(glob_path): new_nb_name = os.path.basename(notebook).replace('.' + pyflag, '') new_nb_path = os.path.join(ipynb_path, new_nb_name) if overwrite_prompt(new_nb_path, default=False): print("Copying", notebook) shutil.copy(notebook, new_nb_path) if args.launch: import subprocess os.chdir(ipynb_path) try: # TODO: Fix KeyboardInterrupt errors try: grp_fn = os.setsid except AttributeError: grp_fn = None proc = subprocess.Popen("jupyter notebook", shell=True, preexec_fn=grp_fn) # stdin=subprocess.PIPE, preexec_fn=grp_fn) # stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError: print("ERROR: Command `jupyter notebook` not found.") print( " If IPython or Anaconda is installed, check your PATH variable." ) sys.exit(1) # CLEAN EXIT AND SHUTDOWN OF IPYTHON NOTEBOOK def signal_handler(signal, frame): # Cross-Platform Compatability try: os.killpg(proc.pid, signal) proc.communicate() except AttributeError: subprocess.call( ['taskkill', '/F', '/T', '/PID', str(proc.pid)]) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print("\nPress Ctrl+C to shutdown the IPython notebook server\n") # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def icons(): with open(get_static_resource_path('www/icons.js')) as icons: text = '{0}\n var icons = {1};'\ .format(icons.read(), json.dumps(self.icons)) return text
def send_static(filename): return static_file(filename, root=get_static_resource_path('www/'))
# -*- coding: utf-8 -*- import os import platform #updated to use pymmseg function calls instead of plain mmseg import string from topicexplorer.lib.util import get_static_resource_path import os.path import topicexplorer.lib.mmseg as mmseg modern_dic = mmseg.Dict(get_static_resource_path("mmseg/modern_words.dic")) ancient_dic = mmseg.Dict(get_static_resource_path("mmseg/ancient_words.dic")) chrs = mmseg.CharFreqs(get_static_resource_path("mmseg/chars.dic")) ancient_mmseg = mmseg.MMSeg(ancient_dic, chrs) modern_mmseg = mmseg.MMSeg(modern_dic, chrs) def is_flagged(toke_number): # '基本汉字'(Basic Chinese Character),20902 characters,'4E00-9FA5' return ((toke_number > ord(u'\u4E00') - 1 and toke_number < ord(u'\u9FEF') + 1) or \ # '扩展A'(Expansion A),6582 characters,'3400-4DB5' (toke_number > ord(u'\u3400') - 1 and toke_number < ord(u'\u4DB5') + 1) or \ # '扩展B'(Expansion B),42711 characters,'20000-2A6D6' (toke_number > int('20000', 16) - 1 and toke_number < int('2A6D6', 16) + 1) or \ # '扩展C'(Expansion C),4149 characters,'2A700-2B734' (toke_number > int('2A700', 16) - 1 and toke_number < int('2B734', 16) + 1) or \ # '扩展D'(Expansion D),222 characters,'2B740-2B81D' (toke_number > int('2B740', 16) - 1 and toke_number < int('2B81D', 16) + 1) or \ # '扩展E'(Expansion E),5762 characters,'2B320-2CEA1' (toke_number > int('2B820', 16) - 1 and toke_number < int('2CEA1', 16) + 1) or \ # '扩展F'(Expansion F),7473 characters,'2CEB0-2EBE0'
def send_static(filename): return static_file(filename, root=get_static_resource_path('www/'))