def test_baseport(host, baseport, topic_range): try: for k in topic_range: port = baseport + k try: s = socket.create_connection((host,port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(host, baseport, topic_range)
def test_port(port): try: host = args.host or config.get("www","host") if host == '0.0.0.0': host = 'localhost' try: s = socket.create_connection((host,port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return port except IOError: port = int_prompt( "Conflict on port {0}. Enter new port:".format(port)) return test_port(port)
def test_port(port): try: host = args.host or config.get("www", "host") if host == '0.0.0.0': host = 'localhost' try: s = socket.create_connection((host, port), 2) s.close() raise IOError("Socket connectable on port {0}".format(port)) except socket.error: pass return port except IOError: port = int_prompt( "Conflict on port {0}. Enter new port:".format(port)) return test_port(port)
def test_baseport(host, baseport, topic_range): try: for k in topic_range: port = baseport + k try: s = socket.create_connection((host, port), 2) s.close() raise IOError( "Socket connectable on port {0}".format(port)) except socket.error: pass return baseport except IOError: baseport = int_prompt( "Conflict on port {0}. Enter new base port: [CURRENT: {1}]"\ .format(port, baseport)) return test_baseport(host, baseport, topic_range)
def main(args): if args.cluster: cluster(args.cluster, args.config_file) return config = topicexplorer.config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if config.getboolean("main", "sentences"): from vsm.extensions.ldasentences import CorpusSent as Corpus else: from vsm.corpus import Corpus if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) if args.quiet: args.k = [int(n) for n in default.split()] else: raise NoOptionError('main', 'topics') except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print("\nTIP: number of topics can be specified with argument '-k N N N ...':") print(" topicexplorer train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k)))) except ValueError: print("Enter valid integers, separated by spaces!") if args.processes < 0: import multiprocessing args.processes = multiprocessing.cpu_count() + args.processes print("Loading corpus... ") corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if (model_pattern is not None and not args.rebuild and (args.quiet or args.cont or bool_prompt("""Existing topic models found. You can continue training or start a new model. Do you want to continue training your existing models? """, default=True))): from vsm.model.lda import LDA m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print("\nTIP: number of training iterations can be specified with argument '--iter N':") print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = int(m.iteration * 1.5) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: # build a new model if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Number of training iterations:", default=200) print("\nTIP: number of training iterations can be specified with argument '--iter N':") print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = 200 # TODO: if only one context_type, make it just the one context type. ctxs = corpus.context_types if len(ctxs) == 1: args.context_type = ctxs[0] else: ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print("\nTIP: context type can be specified with argument '--context-type TYPE':") print(" topicexplorer train --context-type %s %s\n" % (args.context_type, args.config_file)) print("\nTIP: This configuration can be automated as:") print(" topicexplorer train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k)))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: if config.has_option("main", "cluster"): cluster_path = config.get("main", "cluster", fallback=None) config.remove_option("main", "cluster") try: if cluster_path: os.remove(cluster_path) except (OSError, IOError): # fail silently on IOError pass with open(args.config_file, "w") as configfh: config.write(configfh)
def main(args): config = ConfigParser() config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) else: raise NoOptionError except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = raw_input( "Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print "\nTIP: number of topics can be specified with argument '-k N N N ...':" print " vsm train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k))) except ValueError: print "Enter valid integers, separated by spaces!" if args.processes < 0: args.processes = multiprocessing.cpu_count() + args.processes corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if model_pattern is not None and\ bool_prompt("Existing models found. Continue training?", default=True): m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None: args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes) else: # build a new model if args.iter is None: args.iter = int_prompt("Number of training iterations:", default=200) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) ctxs = corpus.context_types ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = raw_input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print "\nTIP: context type can be specified with argument '--context-type TYPE':" print " vsm train --context-type %s %s\n" % ( args.context_type, args.config_file) print "\nTIP: This configuration can be automated as:" print " vsm train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) with open(args.config_file, "wb") as configfh: config.write(configfh)
def main(args): if args.cluster: cluster(args.cluster, args.config_file) return config = topicexplorer.config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if config.getboolean("main", "sentences"): from vsm.extensions.ldasentences import CorpusSent as Corpus else: from vsm.corpus import Corpus if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) if args.quiet: args.k = [int(n) for n in default.split()] else: raise NoOptionError('main', 'topics') except NoOptionError: default = ' '.join(map(str, range(20, 100, 20))) while args.k is None: ks = input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print( "\nTIP: number of topics can be specified with argument '-k N N N ...':" ) print(" topicexplorer train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k)))) except ValueError: print("Enter valid integers, separated by spaces!") if args.processes < 0: import multiprocessing args.processes = multiprocessing.cpu_count() + args.processes print("Loading corpus... ") corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if (model_pattern is not None and not args.rebuild and (args.quiet or args.cont or bool_prompt( """Existing topic models found. You can continue training or start a new model. Do you want to continue training your existing models? """, default=True))): from vsm.model.lda import LDA m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration * 1.5), min=m.iteration) print( "\nTIP: number of training iterations can be specified with argument '--iter N':" ) print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = int(m.iteration * 1.5) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main", "topics")) if args.k != config_topics: new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes, dry_run=args.dry_run) else: # build a new model if args.iter is None and not args.quiet: # pragma: no cover args.iter = int_prompt("Number of training iterations:", default=200) print( "\nTIP: number of training iterations can be specified with argument '--iter N':" ) print(" topicexplorer train --iter %d %s\n" % (args.iter, args.config_file)) elif args.iter is None and args.quiet: # pragma: no cover args.iter = 200 # TODO: if only one context_type, make it just the one context type. ctxs = corpus.context_types if len(ctxs) == 1: args.context_type = ctxs[0] else: ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print( "\nTIP: context type can be specified with argument '--context-type TYPE':" ) print(" topicexplorer train --context-type %s %s\n" % (args.context_type, args.config_file)) print("\nTIP: This configuration can be automated as:") print(" topicexplorer train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k)))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: if config.has_option("main", "cluster"): cluster_path = config.get("main", "cluster", fallback=None) config.remove_option("main", "cluster") try: if cluster_path: os.remove(cluster_path) except (OSError, IOError): # fail silently on IOError pass with open(args.config_file, "w") as configfh: config.write(configfh)
def main(args): from vsm.corpus import Corpus from vsm.model.lda import LDA config = ConfigParser() config.read(args.config_file) corpus_filename = config.get("main", "corpus_file") model_path = config.get("main", "path") if args.k is None: try: if config.get("main", "topics"): default = ' '.join(map(str, eval(config.get("main", "topics")))) else: raise NoOptionError except NoOptionError: default = ' '.join(map(str, range(20,100,20))) while args.k is None: ks = raw_input("Number of Topics [Default '{0}']: ".format(default)) try: if ks: args.k = [int(n) for n in ks.split()] elif not ks.strip(): args.k = [int(n) for n in default.split()] if args.k: print "\nTIP: number of topics can be specified with argument '-k N N N ...':" print " vsm train %s -k %s\n" %\ (args.config_file, ' '.join(map(str, args.k))) except ValueError: print "Enter valid integers, separated by spaces!" if args.processes < 0: args.processes = multiprocessing.cpu_count() + args.processes print "Loading corpus... " corpus = Corpus.load(corpus_filename) try: model_pattern = config.get("main", "model_pattern") except NoOptionError: model_pattern = None if model_pattern is not None and\ bool_prompt("Existing models found. Continue training?", default=True): m = LDA.load(model_pattern.format(args.k[0]), multiprocessing=args.processes > 1, n_proc=args.processes) if args.iter is None: args.iter = int_prompt("Total number of training iterations:", default=int(m.iteration*1.5), min=m.iteration) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) del m # if the set changes, build some new models and continue some old ones config_topics = eval(config.get("main","topics")) if args.k != config_topics : new_models = set(args.k) - set(config_topics) continuing_models = set(args.k) & set(config_topics) build_models(corpus, corpus_filename, model_path, config.get("main", "context_type"), new_models, n_iterations=args.iter, n_proc=args.processes, seed=args.seed) model_pattern = continue_training(model_pattern, continuing_models, args.iter, n_proc=args.processes) else: model_pattern = continue_training(model_pattern, args.k, args.iter, n_proc=args.processes) else: # build a new model if args.iter is None: args.iter = int_prompt("Number of training iterations:", default=200) print "\nTIP: number of training iterations can be specified with argument '--iter N':" print " vsm train --iter %d %s\n" % (args.iter, args.config_file) ctxs = corpus.context_types ctxs = sorted(ctxs, key=lambda ctx: len(corpus.view_contexts(ctx))) if args.context_type not in ctxs: while args.context_type not in ctxs: contexts = ctxs[:] contexts[0] = contexts[0].upper() contexts = '/'.join(contexts) args.context_type = raw_input("Select a context type [%s] : " % contexts) if args.context_type.strip() == '': args.context_type = ctxs[0] if args.context_type == ctxs[0].upper(): args.context_type = ctxs[0] print "\nTIP: context type can be specified with argument '--context-type TYPE':" print " vsm train --context-type %s %s\n" % (args.context_type, args.config_file) print "\nTIP: This configuration can be automated as:" print " vsm train %s --iter %d --context-type %s -k %s\n" %\ (args.config_file, args.iter, args.context_type, ' '.join(map(str, args.k))) model_pattern = build_models(corpus, corpus_filename, model_path, args.context_type, args.k, n_iterations=args.iter, n_proc=args.processes, seed=args.seed, dry_run=args.dry_run) config.set("main", "model_pattern", model_pattern) if args.context_type: # test for presence, since continuing doesn't require context_type config.set("main", "context_type", args.context_type) args.k.sort() config.set("main", "topics", str(args.k)) if not args.dry_run: with open(args.config_file, "wb") as configfh: config.write(configfh)