def get_host_port(args): """ Returns the hostname and port number """ import topicexplorer.config config = topicexplorer.config.read(args.config) # automatic port assignment def test_port(port): try: host = args.host or config.get("www", "host") if host == '0.0.0.0': host = 'localhost' try: s = socket.create_connection((host, port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return port except IOError: if not args.quiet: port = int_prompt( "Conflict on port {0}. Enter new port:".format(port)) return test_port(port) else: raise IOError( "Conflict on port {0}. Try running with -p to manually set new port.".format(port)) port = args.port or int(config.get('www', 'port').format(0)) port = test_port(port) # prompt to save if (int(config.get("www", "port").format(0))) != port: if not args.quiet and bool_prompt( "Change default baseport to {0}?".format(port), default=True): config.set("www", "port", text(port)) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() config.read_file(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config, 'w') as configfh: new_config.write(configfh) # hostname assignment host = args.host or config.get('www', 'host') return host, port
def lang_prompt(languages): global langs out_langs = set() print("Stoplist the following languages?", end=' ') for lang in languages: if lang in langs: if bool_prompt("{}?".format(langs[lang].capitalize()), default=True): out_langs.add(lang) return out_langs
def main(args): if args.cluster: cluster(args.cluster, args.config_file) return config = topicexplorer.config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if config.getboolean("main", "sentences"): from vsm.extensions.ldasentences import CorpusSent as Corpus else: from vsm.corpus import Corpus if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) if args.quiet: args.k = [int(n) for n in default.split()] else: raise NoOptionError('main', 'topics') except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print("\nTIP: number of topics can be specified with argument '-k N N N ...':") print(" topicexplorer train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k)))) except ValueError: print("Enter valid integers, separated by spaces!") if args.processes < 0: import multiprocessing args.processes = multiprocessing.cpu_count() + args.processes print("Loading corpus... ") corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if (model_pattern is not None and not args.rebuild and (args.quiet or args.cont or bool_prompt("""Existing topic models found. You can continue training or start a new model. Do you want to continue training your existing models? """, default=True))): from vsm.model.lda import LDA m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print("\nTIP: number of training iterations can be specified with argument '--iter N':") print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = int(m.iteration * 1.5) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: # build a new model if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Number of training iterations:", default=200) print("\nTIP: number of training iterations can be specified with argument '--iter N':") print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = 200 # TODO: if only one context_type, make it just the one context type. ctxs = corpus.context_types if len(ctxs) == 1: args.context_type = ctxs[0] else: ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print("\nTIP: context type can be specified with argument '--context-type TYPE':") print(" topicexplorer train --context-type %s %s\n" % (args.context_type, args.config_file)) print("\nTIP: This configuration can be automated as:") print(" topicexplorer train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k)))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: if config.has_option("main", "cluster"): cluster_path = config.get("main", "cluster", fallback=None) config.remove_option("main", "cluster") try: if cluster_path: os.remove(cluster_path) except (OSError, IOError): # fail silently on IOError pass with open(args.config_file, "w") as configfh: config.write(configfh)
def main(args): global context_type, lda_c, lda_m, lda_v, label, id_fn # load in the configuration file config = ConfigParser({ 'certfile' : None, 'keyfile' : None, 'ca_certs' : None, 'ssl' : False, 'port' : '8000', 'host' : '0.0.0.0', 'topic_range' : '{0},{1},1'.format(args.k, args.k+1), 'icons': 'link', 'corpus_link' : None, 'doc_title_format' : None, 'doc_url_format' : None, 'topics': None}) config.read(args.config) # path variables path = config.get('main', 'path') context_type = config.get('main', 'context_type') corpus_file = config.get('main', 'corpus_file') model_pattern = config.get('main', 'model_pattern') # automatic port assignment def test_port(port): try: host = args.host or config.get("www","host") if host == '0.0.0.0': host = 'localhost' try: s = socket.create_connection((host,port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return port except IOError: port = int_prompt( "Conflict on port {0}. Enter new port:".format(port)) return test_port(port) port = args.port or int(config.get('www','port').format(0)) + args.k port = test_port(port) # prompt to save if (int(config.get("www","port").format(0)) + args.k) != port: if bool_prompt("Change default baseport to {0}?".format(port - args.k), default=True): config.set("www","port", str(port - args.k)) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config, 'wb') as configfh: new_config.write(configfh) # hostname assignment host = args.host or config.get('www','host') # LDA objects lda_c = Corpus.load(corpus_file) lda_m = None lda_v = None def load_model(k): global lda_m, lda_v lda_m = LDA.load(model_pattern.format(k)) lda_v = LDAViewer(lda_c, lda_m) load_model(args.k) # label function imports try: label_module = config.get('main', 'label_module') label_module = import_module(label_module) print "imported label module" label_module.init(config.get('main','path'), lda_v, context_type) except (ImportError, NoOptionError, AttributeError): pass try: label = label_module.label print "imported label function" except (AttributeError, UnboundLocalError): label = lambda x: x print "using default label function" try: id_fn = label_module.id_fn print "imported id function" except (AttributeError, UnboundLocalError): id_fn = def_label_fn print "using default id function" config_icons = config.get('www','icons').split(",") @route('/icons.js') def icons(): with open(resource_filename(__name__, '../www/icons.js')) as icons: text = '{0}\n var icons = {1};'\ .format(icons.read(), json.dumps(config_icons)) return text # index page parameterization corpus_name = config.get('www','corpus_name') corpus_link = config.get('www','corpus_link') doc_title_format = config.get('www', 'doc_title_format') doc_url_format = config.get('www', 'doc_url_format') if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) topic_range = [{'k' : k, 'port' : int(config.get('www','port').format(0)) + k} for k in topic_range] renderer = pystache.Renderer(escape=lambda u: u) @route('/') def index(): response.set_header('Expires', _cache_date()) with open(resource_filename(__name__, '../www/index.mustache.html'), encoding='utf-8') as tmpl_file: template = tmpl_file.read() return renderer.render(template, {'corpus_name' : corpus_name, 'corpus_link' : corpus_link, 'context_type' : context_type, 'topic_range' : topic_range, 'doc_title_format' : doc_title_format, 'doc_url_format' : doc_url_format}) @route('/<filename:path>') @_set_acao_headers def send_static(filename): return static_file(filename, root=resource_filename(__name__, '../www/')) if args.ssl or config.get('main', 'ssl'): certfile = args.certfile or config.get('ssl', 'certfile') keyfile = args.keyfile or config.get('ssl', 'keyfile') ca_certs = args.ca_certs or config.get('ssl', 'ca_certs') run(host=host, port=port, server=SSLWSGIRefServer, certfile=certfile, keyfile=keyfile, ca_certs=ca_certs) else: run(host=host, port=port)
def main(args): # CONFIGURATION PARSING # load in the configuration file config = ConfigParser({ 'certfile' : None, 'keyfile' : None, 'ca_certs' : None, 'ssl' : False, 'port' : '8000', 'host' : '0.0.0.0', 'icons': 'link', 'corpus_link' : None, 'doc_title_format' : None, 'doc_url_format' : None, 'topic_range': None, 'fulltext' : 'false', 'raw_corpus': None, 'topics': None}) config.read(args.config_file) if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) print topic_range # LAUNCHING SERVERS # Cross-platform compatability def get_log_file(k): if config.has_section('logging'): path = config.get('logging','path') path = path.format(k) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) return open(path, 'a') else: return subprocess.PIPE def test_baseport(host, baseport, topic_range): try: for k in topic_range: port = baseport + k try: s = socket.create_connection((host,port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(host, baseport, topic_range) host = config.get("www","host") if host == '0.0.0.0': host = socket.gethostname() baseport = int(config.get("www","port").format(0)) baseport = test_baseport(host, baseport, topic_range) # prompt to save if int(config.get("www","port").format(0)) != baseport: if bool_prompt("Change default baseport to {0}?".format(baseport), default=True): config.set("www","port", baseport) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config_file,'wb') as configfh: new_config.write(configfh) try: grp_fn = os.setsid except AttributeError: grp_fn = None fulltext = '--fulltext' if args.fulltext else '' procs = [subprocess.Popen("vsm serve -k {k} -p {port} {config_file} {fulltext}".format( k=k, port=(baseport+k), config_file=args.config_file,fulltext=fulltext), shell=True, stdout=get_log_file(k), stderr=subprocess.STDOUT, preexec_fn=grp_fn) for k in topic_range] print "pid","port" for proc,k in zip(procs, topic_range): port = baseport + k print proc.pid, "http://{host}:{port}/".format(host=host,port=port) # CLEAN EXIT AND SHUTDOWN OF SERVERS def signal_handler(signal,frame): print "\n" for p, k in zip(procs, topic_range): print "Stopping {}-topic model (Process ID: {})".format(k, p.pid) # Cross-Platform Compatability if platform.system() == 'Windows': subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)], stdout=open(os.devnull), stderr=open(os.devnull)) else: os.killpg(p.pid, signal) sys.exit() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) port = baseport + topic_range[0] url = "http://{host}:{port}/".format(host=host,port=port) # TODO: Add enhanced port checking while True: wait_count = 0 try: urllib.urlopen(url) print "Server successfully started" break except: time.sleep(1) wait_count += 1 if wait_count == 60: print "\nLaunching the server seems to be taking a long time." print "This may be due to loading in a large corpus." print "\nTo test launching a single model, press Ctrl+C to abort launch," print "then use the `serve` command to find the error message:" print "\tvsm serve {config} -k {k}".format( config=args.config_file, k=topic_range[0]) for proc,k in zip(procs, topic_range): if proc.poll() is not None: print "\nAn error has occurred launching the {}-topic model.".format(k) try: with get_log_file(k) as logfile: print "A log has been written to: {}\n".format(logfile.name) except AttributeError: # No log file, things are a-ok. pass print "Use the `serve` command to debug errors:" print "\tvsm serve {config} -k {k}".format(config=args.config_file, k=k) for p in procs: if p.poll() is None: try: os.killpg(p.pid, signal.SIGTERM) except AttributeError: # Cross-Platform Compatability subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)]) sys.exit(1) if args.browser: webbrowser.open(url) print "TIP: Browser launch can be disabled with the '--no-browser' argument:" print "vsm launch --no-browser", args.config_file, "\n" print "Press Ctrl+C to shutdown the Topic Explorer server" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): # TODO: remove this code, check if there is an issue and unit test # convert to unicode to avoid windows errors # args.corpus_path = args.corpus_path # config corpus_path # process bibtex files args.bibtex = args.corpus_path.endswith('.bib') if args.bibtex: args.bibtex = args.corpus_path args.corpus_path = process_bibtex(args.corpus_path, args.quiet) # set corpus_name args.corpus_name = os.path.basename(args.corpus_path) if not args.corpus_name: args.corpus_name = os.path.basename(os.path.dirname(args.corpus_path)) if not args.corpus_print_name and not args.quiet: args.corpus_print_name = prompt("Corpus Name", default=args.corpus_name) # configure model-path if args.model_path is None: if os.path.isdir(args.corpus_path): args.model_path = os.path.join(args.corpus_path, '../models/') else: args.model_path = os.path.dirname(args.corpus_path) if args.model_path and not os.path.exists(os.path.abspath(args.model_path)): os.makedirs(os.path.abspath(args.model_path)) args.corpus_filename = get_corpus_filename( args.corpus_path, args.model_path, stop_freq=args.stop_freq) if not args.rebuild and os.path.exists(os.path.abspath(args.corpus_filename)): if args.quiet: print("Path exits: {}".format(args.corpus_filename)) sys.exit(1) else: args.rebuild = bool_prompt("\nCorpus file found. Rebuild? ", default=False) else: args.rebuild = True if args.htrc: import vsm.extensions.htrc as htrc if os.path.isdir(args.corpus_path): #htrc.proc_htrc_coll(args.corpus_path) ids = [id.replace('.txt','') for id in listdir_nohidden(args.corpus_path)] args.htrc_metapath = os.path.abspath(args.corpus_path + '/../') args.htrc_metapath = os.path.join(args.htrc_metapath, os.path.basename(args.corpus_path) + '.metadata.json') else: import topicexplorer.extensions.htrc_features as htrc_features with open(args.corpus_path) as idfile: ids = [row.strip() for row in idfile if row.strip()] c = htrc_features.create_corpus(ids, nltk_stop=args.nltk,freq=args.stop_freq) c.save(args.corpus_filename) if args.rebuild and (not args.htrc or os.path.isdir(args.corpus_path)): try: args.corpus_filename = build_corpus( args.corpus_path, args.model_path, stop_freq=args.stop_freq, decode=args.decode, nltk_stop=args.nltk, simple=args.simple, sentences=args.sentences, tokenizer=args.tokenizer) except IOError as e: print("ERROR: invalid path, please specify either:") print(" * a single plain-text or PDF file,") print(" * a single bibtex (.bib) file with 'file' fields,") print(" * a folder of plain-text or PDF files, or") print(" * a folder of folders of plain-text or PDF files.") print("\nExiting...") raise e sys.exit(74) """ except LookupError as e: if 'punkt' in e.message: print "\nERROR: sentence tokenizer not available, download by running:" print " python -m nltk.downloader punkt" elif 'stopwords' in e.message: print "\nERROR: stopwords not available, download by running:" print " python -m nltk.downloader stopwords" else: raise e print "\nExiting..." sys.exit(74) """ args.config_file = write_config(args, args.config_file) args.corpus_desc = args.config_file + '.md' if not args.quiet and os.path.exists(args.corpus_desc): while args.corpus_desc not in ['y', 'n', False]: args.corpus_desc = input("\nExisting corpus description found. Remove? [y/N] ") args.corpus_desc = args.corpus_desc.lower().strip() if args.corpus_desc == '': args.corpus_desc = False else: if args.corpus_desc == 'y': args.corpus_desc = args.config_file + '.md' if args.corpus_desc: with open(args.corpus_desc, 'w') as outfile: outfile.write( """This is an instance of the [InPhO Topic Explorer](http://inphodata.cogs.indiana.edu/). If you would like to add a custom corpus description, either: - Modify the contents of the file `{}` - Change the main:corpus_desc path in `{}` to an existing Markdown file. """.format(os.path.abspath(args.corpus_desc), os.path.abspath(args.config_file))) return args.config_file
def main(args): # TODO: remove this code, check if there is an issue and unit test # convert to unicode to avoid windows errors # args.corpus_path = args.corpus_path # config corpus_path # process bibtex files args.bibtex = args.corpus_path.endswith('.bib') if args.bibtex: args.bibtex = args.corpus_path args.corpus_path = process_bibtex(args.corpus_path, args.quiet) # set corpus_name args.corpus_name = os.path.basename(args.corpus_path) if not args.corpus_name: args.corpus_name = os.path.basename(os.path.dirname(args.corpus_path)) if not args.corpus_print_name and not args.quiet: args.corpus_print_name = prompt("Corpus Name", default=args.corpus_name) # configure model-path if args.model_path is None: if os.path.isdir(args.corpus_path): args.model_path = os.path.join(args.corpus_path, '../models/') else: args.model_path = os.path.dirname(args.corpus_path) if args.model_path and not os.path.exists(os.path.abspath( args.model_path)): os.makedirs(os.path.abspath(args.model_path)) args.corpus_filename = get_corpus_filename(args.corpus_path, args.model_path, stop_freq=args.stop_freq) if not args.rebuild and os.path.exists( os.path.abspath(args.corpus_filename)): if args.quiet: print("Path exits: {}".format(args.corpus_filename)) sys.exit(1) else: args.rebuild = bool_prompt("\nCorpus file found. Rebuild? ", default=False) else: args.rebuild = True if args.htrc: import vsm.extensions.htrc as htrc if os.path.isdir(args.corpus_path): #htrc.proc_htrc_coll(args.corpus_path) ids = [ id.replace('.txt', '') for id in listdir_nohidden(args.corpus_path) ] args.htrc_metapath = os.path.abspath(args.corpus_path + '/../') args.htrc_metapath = os.path.join( args.htrc_metapath, os.path.basename(args.corpus_path) + '.metadata.json') else: import topicexplorer.extensions.htrc_features as htrc_features with open(args.corpus_path) as idfile: ids = [row.strip() for row in idfile if row.strip()] c = htrc_features.create_corpus(ids, nltk_stop=args.nltk, freq=args.stop_freq) c.save(args.corpus_filename) if args.rebuild and (not args.htrc or os.path.isdir(args.corpus_path)): try: args.corpus_filename = build_corpus(args.corpus_path, args.model_path, stop_freq=args.stop_freq, decode=args.decode, nltk_stop=args.nltk, simple=args.simple, sentences=args.sentences, tokenizer=args.tokenizer) except IOError as e: print("ERROR: invalid path, please specify either:") print(" * a single plain-text or PDF file,") print(" * a single bibtex (.bib) file with 'file' fields,") print(" * a folder of plain-text or PDF files, or") print(" * a folder of folders of plain-text or PDF files.") print("\nExiting...") raise e sys.exit(74) """ except LookupError as e: if 'punkt' in e.message: print "\nERROR: sentence tokenizer not available, download by running:" print " python -m nltk.downloader punkt" elif 'stopwords' in e.message: print "\nERROR: stopwords not available, download by running:" print " python -m nltk.downloader stopwords" else: raise e print "\nExiting..." sys.exit(74) """ args.config_file = write_config(args, args.config_file) args.corpus_desc = args.config_file + '.md' if not args.quiet and os.path.exists(args.corpus_desc): while args.corpus_desc not in ['y', 'n', False]: args.corpus_desc = input( "\nExisting corpus description found. Remove? [y/N] ") args.corpus_desc = args.corpus_desc.lower().strip() if args.corpus_desc == '': args.corpus_desc = False else: if args.corpus_desc == 'y': args.corpus_desc = args.config_file + '.md' if args.corpus_desc: with open(args.corpus_desc, 'w') as outfile: outfile.write( """This is an instance of the [InPhO Topic Explorer](http://inphodata.cogs.indiana.edu/). If you would like to add a custom corpus description, either: - Modify the contents of the file `{}` - Change the main:corpus_desc path in `{}` to an existing Markdown file. """.format(os.path.abspath(args.corpus_desc), os.path.abspath(args.config_file))) return args.config_file
def main(args): config = ConfigParser() config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) else: raise NoOptionError except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = raw_input( "Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print "\nTIP: number of topics can be specified with argument '-k N N N ...':" print " vsm train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k))) except ValueError: print "Enter valid integers, separated by spaces!" if args.processes < 0: args.processes = multiprocessing.cpu_count() + args.processes corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if model_pattern is not None and\ bool_prompt("Existing models found. Continue training?", default=True): m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None: args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes) else: # build a new model if args.iter is None: args.iter = int_prompt("Number of training iterations:", default=200) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) ctxs = corpus.context_types ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = raw_input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print "\nTIP: context type can be specified with argument '--context-type TYPE':" print " vsm train --context-type %s %s\n" % ( args.context_type, args.config_file) print "\nTIP: This configuration can be automated as:" print " vsm train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) with open(args.config_file, "wb") as configfh: config.write(configfh)
def main(args): # CONFIGURATION PARSING # load in the configuration file config = ConfigParser({ 'certfile': None, 'keyfile': None, 'ca_certs': None, 'ssl': False, 'port': '8000', 'host': '0.0.0.0', 'icons': 'link', 'corpus_link': None, 'doc_title_format': None, 'doc_url_format': None, 'topic_range': None, 'topics': None }) config.read(args.config_file) if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) print topic_range # LAUNCHING SERVERS # Cross-platform compatability def get_log_file(k): if config.has_section('logging'): path = config.get('logging', 'path') path = path.format(k) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) return open(path, 'a') else: return subprocess.PIPE def test_baseport(baseport, topic_range): try: host = config.get("www", "host") if host == '0.0.0.0': host = 'localhost' for k in topic_range: port = baseport + k try: s = socket.create_connection((host, port), 2) s.close() raise IOError( "Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(baseport, topic_range) baseport = test_baseport(int(config.get("www", "port").format(0)), topic_range) # prompt to save if int(config.get("www", "port").format(0)) != baseport: if bool_prompt("Change default baseport to {0}?".format(baseport), default=True): config.set("www", "port", baseport) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config_file, 'wb') as configfh: new_config.write(configfh) try: grp_fn = os.setsid except AttributeError: grp_fn = None procs = [ subprocess.Popen("vsm serve -k {k} -p {port} {config_file}".format( k=k, port=(baseport + k), config_file=args.config_file), shell=True, stdout=get_log_file(k), stderr=subprocess.STDOUT, preexec_fn=grp_fn) for k in topic_range ] print "pid", "port" for proc, k in zip(procs, topic_range): port = baseport + k host = config.get("www", "host") print proc.pid, "http://{host}:{port}/".format(host=host, port=port) # CLEAN EXIT AND SHUTDOWN OF SERVERS def signal_handler(signal, frame): print "\n" for p in procs: print "killing", p.pid # Cross-Platform Compatability try: os.killpg(p.pid, signal) except AttributeError: subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)]) sys.exit() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) port = baseport + topic_range[0] host = config.get("www", "host") if host == '0.0.0.0': host = 'localhost' url = "http://{host}:{port}/".format(host=host, port=port) # TODO: Add enhanced port checking while True: try: urllib.urlopen(url) print "Server successfully started" break except: time.sleep(1) if args.browser: webbrowser.open(url) print "TIP: Browser launch can be disabled with the '--no-browser' argument:" print "vsm launch --no-browser", args.config_file, "\n" print "Press Ctrl+C to shutdown the Topic Explorer server" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): if args.cluster: cluster(args.cluster, args.config_file) return config = topicexplorer.config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if config.getboolean("main", "sentences"): from vsm.extensions.ldasentences import CorpusSent as Corpus else: from vsm.corpus import Corpus if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) if args.quiet: args.k = [int(n) for n in default.split()] else: raise NoOptionError('main', 'topics') except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print( "\nTIP: number of topics can be specified with argument '-k N N N ...':" ) print(" topicexplorer train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k)))) except ValueError: print("Enter valid integers, separated by spaces!") if args.processes < 0: import multiprocessing args.processes = multiprocessing.cpu_count() + args.processes print("Loading corpus... ") corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if (model_pattern is not None and not args.rebuild and (args.quiet or args.cont or bool_prompt( """Existing topic models found. You can continue training or start a new model. Do you want to continue training your existing models? """, default=True))): from vsm.model.lda import LDA m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print( "\nTIP: number of training iterations can be specified with argument '--iter N':" ) print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = int(m.iteration * 1.5) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: # build a new model if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Number of training iterations:", default=200) print( "\nTIP: number of training iterations can be specified with argument '--iter N':" ) print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = 200 # TODO: if only one context_type, make it just the one context type. ctxs = corpus.context_types if len(ctxs) == 1: args.context_type = ctxs[0] else: ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print( "\nTIP: context type can be specified with argument '--context-type TYPE':" ) print(" topicexplorer train --context-type %s %s\n" % (args.context_type, args.config_file)) print("\nTIP: This configuration can be automated as:") print(" topicexplorer train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k)))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: if config.has_option("main", "cluster"): cluster_path = config.get("main", "cluster", fallback=None) config.remove_option("main", "cluster") try: if cluster_path: os.remove(cluster_path) except (OSError, IOError): # fail silently on IOError pass with open(args.config_file, "w") as configfh: config.write(configfh)
def main(args): from vsm.corpus import Corpus from vsm.model.lda import LDA config = ConfigParser() config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) else: raise NoOptionError except NoOptionError: default = ' '.join(map(str, range(20,100,20))) while args.k is None: ks = raw_input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print "\nTIP: number of topics can be specified with argument '-k N N N ...':" print " vsm train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k))) except ValueError: print "Enter valid integers, separated by spaces!" if args.processes < 0: args.processes = multiprocessing.cpu_count() + args.processes print "Loading corpus... " corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if model_pattern is not None and\ bool_prompt("Existing models found. Continue training?", default=True): m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None: args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration*1.5), min=m.iteration) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main","topics")) if args.k != config_topics : new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes) else: # build a new model if args.iter is None: args.iter = int_prompt("Number of training iterations:", default=200) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) ctxs = corpus.context_types ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = raw_input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print "\nTIP: context type can be specified with argument '--context-type TYPE':" print " vsm train --context-type %s %s\n" % (args.context_type, args.config_file) print "\nTIP: This configuration can be automated as:" print " vsm train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: with open(args.config_file, "wb") as configfh: config.write(configfh)
def main(args): global context_type, lda_c, lda_m, lda_v, label, id_fn # load in the configuration file config = ConfigParser({ 'certfile': None, 'keyfile': None, 'ca_certs': None, 'ssl': False, 'port': '8000', 'host': '0.0.0.0', 'topic_range': '{0},{1},1'.format(args.k, args.k + 1), 'icons': 'link', 'corpus_link': None, 'doc_title_format': None, 'doc_url_format': None, 'topics': None }) config.read(args.config) # path variables path = config.get('main', 'path') context_type = config.get('main', 'context_type') corpus_file = config.get('main', 'corpus_file') model_pattern = config.get('main', 'model_pattern') # automatic port assignment def test_port(port): try: host = args.host or config.get("www", "host") if host == '0.0.0.0': host = 'localhost' try: s = socket.create_connection((host, port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return port except IOError: port = int_prompt( "Conflict on port {0}. Enter new port:".format(port)) return test_port(port) port = args.port or int(config.get('www', 'port').format(0)) + args.k port = test_port(port) # prompt to save if (int(config.get("www", "port").format(0)) + args.k) != port: if bool_prompt("Change default baseport to {0}?".format(port - args.k), default=True): config.set("www", "port", str(port - args.k)) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config, 'wb') as configfh: new_config.write(configfh) # hostname assignment host = args.host or config.get('www', 'host') # LDA objects lda_c = Corpus.load(corpus_file) lda_m = None lda_v = None def load_model(k): global lda_m, lda_v lda_m = LDA.load(model_pattern.format(k)) lda_v = LDAViewer(lda_c, lda_m) load_model(args.k) # label function imports try: label_module = config.get('main', 'label_module') label_module = import_module(label_module) print "imported label module" label_module.init(config.get('main', 'path'), lda_v, context_type) except (ImportError, NoOptionError, AttributeError): pass try: label = label_module.label print "imported label function" except (AttributeError, UnboundLocalError): label = lambda x: x print "using default label function" try: id_fn = label_module.id_fn print "imported id function" except (AttributeError, UnboundLocalError): id_fn = def_label_fn print "using default id function" config_icons = config.get('www', 'icons').split(",") @route('/icons.js') def icons(): with open(resource_filename(__name__, '../www/icons.js')) as icons: text = '{0}\n var icons = {1};'\ .format(icons.read(), json.dumps(config_icons)) return text # index page parameterization corpus_name = config.get('www', 'corpus_name') corpus_link = config.get('www', 'corpus_link') doc_title_format = config.get('www', 'doc_title_format') doc_url_format = config.get('www', 'doc_url_format') if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) topic_range = [{ 'k': k, 'port': int(config.get('www', 'port').format(0)) + k } for k in topic_range] renderer = pystache.Renderer(escape=lambda u: u) @route('/') def index(): response.set_header('Expires', _cache_date()) with open(resource_filename(__name__, '../www/index.mustache.html'), encoding='utf-8') as tmpl_file: template = tmpl_file.read() return renderer.render( template, { 'corpus_name': corpus_name, 'corpus_link': corpus_link, 'context_type': context_type, 'topic_range': topic_range, 'doc_title_format': doc_title_format, 'doc_url_format': doc_url_format }) @route('/<filename:path>') @_set_acao_headers def send_static(filename): return static_file(filename, root=resource_filename(__name__, '../www/')) if args.ssl or config.get('main', 'ssl'): certfile = args.certfile or config.get('ssl', 'certfile') keyfile = args.keyfile or config.get('ssl', 'keyfile') ca_certs = args.ca_certs or config.get('ssl', 'ca_certs') run(host=host, port=port, server=SSLWSGIRefServer, certfile=certfile, keyfile=keyfile, ca_certs=ca_certs) else: run(host=host, port=port)
def main(args): # CONFIGURATION PARSING # load in the configuration file config = ConfigParser({ 'certfile': None, 'keyfile': None, 'ca_certs': None, 'ssl': False, 'port': '8000', 'host': '0.0.0.0', 'icons': 'link', 'corpus_link': None, 'doc_title_format': None, 'doc_url_format': None, 'topic_range': None, 'fulltext': 'false', 'raw_corpus': None, 'topics': None }) config.read(args.config_file) if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) print topic_range # LAUNCHING SERVERS # Cross-platform compatability def get_log_file(k): if config.has_section('logging'): path = config.get('logging', 'path') path = path.format(k) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) return open(path, 'a') else: return subprocess.PIPE def test_baseport(host, baseport, topic_range): try: for k in topic_range: port = baseport + k try: s = socket.create_connection((host, port), 2) s.close() raise IOError( "Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(host, baseport, topic_range) host = config.get("www", "host") if host == '0.0.0.0': host = socket.gethostname() baseport = int(config.get("www", "port").format(0)) baseport = test_baseport(host, baseport, topic_range) # prompt to save if int(config.get("www", "port").format(0)) != baseport: if bool_prompt("Change default baseport to {0}?".format(baseport), default=True): config.set("www", "port", baseport) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config_file, 'wb') as configfh: new_config.write(configfh) try: grp_fn = os.setsid except AttributeError: grp_fn = None fulltext = '--fulltext' if args.fulltext else '' procs = [ subprocess.Popen( "vsm serve -k {k} -p {port} {config_file} {fulltext}".format( k=k, port=(baseport + k), config_file=args.config_file, fulltext=fulltext), shell=True, stdout=get_log_file(k), stderr=subprocess.STDOUT, preexec_fn=grp_fn) for k in topic_range ] print "pid", "port" for proc, k in zip(procs, topic_range): port = baseport + k print proc.pid, "http://{host}:{port}/".format(host=host, port=port) # CLEAN EXIT AND SHUTDOWN OF SERVERS def signal_handler(signal, frame): print "\n" for p, k in zip(procs, topic_range): print "Stopping {}-topic model (Process ID: {})".format(k, p.pid) # Cross-Platform Compatability if platform.system() == 'Windows': subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)], stdout=open(os.devnull), stderr=open(os.devnull)) else: os.killpg(p.pid, signal) sys.exit() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) port = baseport + topic_range[0] url = "http://{host}:{port}/".format(host=host, port=port) # TODO: Add enhanced port checking while True: wait_count = 0 try: urllib.urlopen(url) print "Server successfully started" break except: time.sleep(1) wait_count += 1 if wait_count == 60: print "\nLaunching the server seems to be taking a long time." print "This may be due to loading in a large corpus." print "\nTo test launching a single model, press Ctrl+C to abort launch," print "then use the `serve` command to find the error message:" print "\tvsm serve {config} -k {k}".format(config=args.config_file, k=topic_range[0]) for proc, k in zip(procs, topic_range): if proc.poll() is not None: print "\nAn error has occurred launching the {}-topic model.".format( k) try: with get_log_file(k) as logfile: print "A log has been written to: {}\n".format( logfile.name) except AttributeError: # No log file, things are a-ok. pass print "Use the `serve` command to debug errors:" print "\tvsm serve {config} -k {k}".format( config=args.config_file, k=k) for p in procs: if p.poll() is None: try: os.killpg(p.pid, signal.SIGTERM) except AttributeError: # Cross-Platform Compatability subprocess.call( ['taskkill', '/F', '/T', '/PID', str(p.pid)]) sys.exit(1) if args.browser: webbrowser.open(url) print "TIP: Browser launch can be disabled with the '--no-browser' argument:" print "vsm launch --no-browser", args.config_file, "\n" print "Press Ctrl+C to shutdown the Topic Explorer server" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)
def main(args): # CONFIGURATION PARSING # load in the configuration file config = ConfigParser({ 'certfile' : None, 'keyfile' : None, 'ca_certs' : None, 'ssl' : False, 'port' : '8000', 'host' : '0.0.0.0', 'icons': 'link', 'corpus_link' : None, 'doc_title_format' : None, 'doc_url_format' : None, 'topic_range': None, 'topics': None}) config.read(args.config_file) if config.get('main', 'topic_range'): topic_range = map(int, config.get('main', 'topic_range').split(',')) topic_range = range(*topic_range) if config.get('main', 'topics'): topic_range = eval(config.get('main', 'topics')) print topic_range # LAUNCHING SERVERS # Cross-platform compatability def get_log_file(k): if config.has_section('logging'): path = config.get('logging','path') path = path.format(k) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) return open(path, 'a') else: return subprocess.PIPE def test_baseport(baseport, topic_range): try: host = config.get("www","host") if host == '0.0.0.0': host = 'localhost' for k in topic_range: port = baseport + k try: s = socket.create_connection((host,port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(baseport) baseport = test_baseport(int(config.get("www","port").format(0)), topic_range) # prompt to save if int(config.get("www","port").format(0)) != baseport: if bool_prompt("Change default baseport to {0}?".format(baseport), default=True): config.set("www","port", baseport) # create deep copy of configuration # see http://stackoverflow.com/a/24343297 config_string = StringIO() config.write(config_string) # skip DEFAULT section config_string.seek(0) idx = config_string.getvalue().index("[main]") config_string.seek(idx) # read deep copy new_config = ConfigParser() new_config.readfp(config_string) # write deep copy without DEFAULT section # this preserves DEFAULT for rest of program with open(args.config_file,'wb') as configfh: new_config.write(configfh) try: grp_fn = os.setsid except AttributeError: grp_fn = None procs = [subprocess.Popen("vsm serve -k {k} -p {port} {config_file}".format( k=k, port=(baseport+k), config_file=args.config_file), shell=True, stdout=get_log_file(k), stderr=subprocess.STDOUT, preexec_fn=grp_fn) for k in topic_range] print "pid","port" for proc,k in zip(procs, topic_range): port = baseport + k host = config.get("www","host") print proc.pid, "http://{host}:{port}/".format(host=host,port=port) # CLEAN EXIT AND SHUTDOWN OF SERVERS def signal_handler(signal,frame): print "\n" for p in procs: print "killing", p.pid # Cross-Platform Compatability try: os.killpg(p.pid, signal) except AttributeError: subprocess.call(['taskkill', '/F', '/T', '/PID', str(p.pid)]) sys.exit() signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) port = baseport + topic_range[0] host = config.get("www","host") if host == '0.0.0.0': host = 'localhost' url = "http://{host}:{port}/".format(host=host,port=port) # TODO: Add enhanced port checking while True: try: urllib.urlopen(url) print "Server successfully started" break except: time.sleep(1) if args.browser: webbrowser.open(url) print "TIP: Browser launch can be disabled with the '--no-browser' argument:" print "vsm launch --no-browser", args.config_file, "\n" print "Press Ctrl+C to shutdown the Topic Explorer server" # Cross-platform Compatability try: signal.pause() except AttributeError: # Windows hack while True: time.sleep(1)