def main() -> None: parser = argparse.ArgumentParser( description="Runs Neural Monkey as a web server.") parser.add_argument("--port", type=int, default=5000) parser.add_argument("--host", type=str, default="127.0.0.1") parser.add_argument("--configuration", type=str, required=True) parser.add_argument("--preprocess", type=str, required=False, default=None) args = parser.parse_args() print("") if args.preprocess is not None: preprocessing = Configuration() preprocessing.add_argument("preprocess") preprocessing.load_file(args.preprocess) preprocessing.build_model() APP.config["preprocess"] = preprocessing.model.preprocess else: APP.config["preprocess"] = [] exp = Experiment(config_path=args.configuration) exp.build_model() APP.config["experiment"] = exp APP.run(port=args.port, host=args.host)
def main() -> None: # pylint: disable=no-member,broad-except if len(sys.argv) != 3: print("Usage: run.py <run_ini_file> <test_datasets>") exit(1) test_datasets = Configuration() test_datasets.add_argument('test_datasets') test_datasets.add_argument('variables') CONFIG.load_file(sys.argv[1]) CONFIG.build_model() test_datasets.load_file(sys.argv[2]) test_datasets.build_model() datesets_model = test_datasets.model initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager, datesets_model.variables) print("") evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e for e in CONFIG.model.evaluation] for dataset in datesets_model.test_datasets: execution_results, output_data = run_on_dataset( CONFIG.model.tf_manager, CONFIG.model.runners, dataset, CONFIG.model.postprocess, write_out=True) # TODO what if there is no ground truth eval_result = evaluation(evaluators, dataset, CONFIG.model.runners, execution_results, output_data) if eval_result: print_final_evaluation(dataset.name, eval_result)
def main(): # pylint: disable=no-member,broad-except if len(sys.argv) != 3: print("Usage: run.py <run_ini_file> <test_datasets>") exit(1) test_datasets = Configuration() test_datasets.add_argument('test_datasets') args, sess = initialize_for_running(sys.argv[1]) datasets_args = test_datasets.load_file(sys.argv[2]) print("") try: for dataset in datasets_args.test_datasets: check_dataset_and_coders(dataset, args.encoders) except Exception as exc: log(exc.message, color='red') exit(1) for dataset in datasets_args.test_datasets: _, _, evaluation = run_on_dataset( sess, args.runner, args.encoders + [args.decoder], args.decoder, dataset, args.evaluation, args.postprocess, write_out=True) if evaluation: print_dataset_evaluation(dataset.name, evaluation)
def main(): # pylint: disable=no-member,broad-except if len(sys.argv) != 3: print("Usage: run.py <run_ini_file> <test_datasets>") exit(1) test_datasets = Configuration() test_datasets.add_argument('test_datasets') args, sess = initialize_for_running(sys.argv[1]) datasets_args = test_datasets.load_file(sys.argv[2]) print("") try: for dataset in datasets_args.test_datasets: check_dataset_and_coders(dataset, args.encoders) except Exception as exc: log(str(exc), color='red') exit(1) for dataset in datasets_args.test_datasets: _, _, evaluation = run_on_dataset(sess, args.runner, args.encoders + [args.decoder], args.decoder, dataset, args.evaluation, args.postprocess, write_out=True) if evaluation: print_dataset_evaluation(dataset.name, evaluation)
def main() -> None: # pylint: disable=no-member,broad-except parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("config", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument("datasets", metavar="INI-TEST-DATASETS", help="the configuration of the test datasets") parser.add_argument("-g", "--grid", dest="grid", action="store_true", help="look at the SGE variables for slicing the data") args = parser.parse_args() test_datasets = Configuration() test_datasets.add_argument("test_datasets") test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list)) test_datasets.load_file(args.datasets) test_datasets.build_model() datasets_model = test_datasets.model exp = Experiment(config_path=args.config) exp.build_model() exp.load_variables(datasets_model.variables) if args.grid and len(datasets_model.test_datasets) > 1: raise ValueError("Only one test dataset supported when using --grid") for dataset in datasets_model.test_datasets: if args.grid: if ("SGE_TASK_FIRST" not in os.environ or "SGE_TASK_LAST" not in os.environ or "SGE_TASK_STEPSIZE" not in os.environ or "SGE_TASK_ID" not in os.environ): raise EnvironmentError( "Some SGE environment variables are missing") length = int(os.environ["SGE_TASK_STEPSIZE"]) start = int(os.environ["SGE_TASK_ID"]) - 1 end = int(os.environ["SGE_TASK_LAST"]) - 1 if start + length > end: length = end - start + 1 log("Running grid task {} starting at {} with step {}".format( start // length, start, length)) dataset = dataset.subset(start, length) if exp.config.args.evaluation is None: exp.run_model(dataset, write_out=True) else: exp.evaluate(dataset, write_out=True) for session in exp.config.model.tf_manager.sessions: session.close()
def load_runtime_config(config_path: str) -> argparse.Namespace: """Load a runtime configuration file.""" cfg = Configuration() cfg.add_argument("test_datasets") cfg.add_argument("variables", cond=lambda x: isinstance(x, list)) cfg.load_file(config_path) cfg.build_model() return cfg.model
def main() -> None: # pylint: disable=no-member,broad-except parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("config", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument("datasets", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument("--beam", metavar="BEAM_SIZE", type=int, default=10, help="Beam size.") parser.add_argument("--kenlm", type=str, help="Path to a KenLM model arpa file.") parser.add_argument("--prefix", type=str, help="Path used as a prefix of stored checkpoints.") parser.add_argument("--lm-weight", type=float, help="Default weight of the language model.") parser.add_argument("--null-trail-weight", type=float, help="Default weight of the null-trailing feature.") parser.add_argument("--nt-ratio-weight", type=float, help="Default weight of the null-token ratio feature.") args = parser.parse_args() test_datasets = Configuration() test_datasets.add_argument("test_datasets") test_datasets.add_argument("batch_size", cond=lambda x: x > 0) test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list)) test_datasets.load_file(args.datasets) test_datasets.build_model() datasets_model = test_datasets.model exp = Experiment(config_path=args.config) exp.build_model() exp.load_variables(datasets_model.variables) weights = {} if args.lm_weight is not None: weights['lm_score'] = args.lm_weight if args.null_trail_weight is not None: weights['null_trailing'] = args.null_trail_weight if args.nt_ratio_weight is not None: weights['null_token_ratio'] = args.nt_ratio_weight if not weights: raise ValueError("No default weights specified, nothing to train.") ctc_decoder = None for runner in exp.model.runners: if (isinstance(runner, PlainRunner) and isinstance(runner.decoder, CTCDecoder)): ctc_decoder = runner.decoder break if ctc_decoder is None: raise ValueError( "Was not able to detect CTC decoder in the configuration.") print("Loading language model") lm = NGramModel(args.kenlm) print("LM loaded") logits_runner = RepresentationRunner(output_series="logits", encoder=ctc_decoder, attribute="logits") exp.model.runners = [logits_runner] dataset = datasets_model.test_datasets[0] singleton_batches = dataset.batches(BatchingScheme(1)) DATASET_SIZE = dataset.length CHECKPOINTS = 5 CHECKPOINT_ITERS = int(DATASET_SIZE / CHECKPOINTS) print( "{} sentences in the dataset, checkpoint every {} sentences ({} checkpoints in total)." .format(DATASET_SIZE, CHECKPOINT_ITERS, CHECKPOINTS)) for i, sent_dataset in enumerate(singleton_batches): ctc_model_result = exp.run_model(sent_dataset, write_out=False, batch_size=1) logits = np.squeeze(ctc_model_result[1]['logits'], axis=1) target = ctc_model_result[2]['target'][0] train_weights(logits, args.beam, ctc_decoder.vocabulary, target, weights, lm) print( "[{}] Weights:".format(i + 1), ", ".join([ "{}: {:.3f}".format(key, value) for key, value in weights.items() ])) if i != 0 and (i + 1) % CHECKPOINT_ITERS == 0: with open("{}.{}".format(args.prefix, int(i / CHECKPOINT_ITERS)), "w") as f: for key, value in weights.items(): f.write("{}={:.3f}\n".format(key.upper(), value)) print("\nCheckpoint saved.\n") for session in exp.config.model.tf_manager.sessions: session.close()
def create_config() -> Configuration: config = Configuration() # training loop arguments config.add_argument('tf_manager') config.add_argument('epochs', cond=lambda x: x >= 0) config.add_argument('trainer') config.add_argument('batch_size', cond=lambda x: x > 0) config.add_argument('train_dataset') config.add_argument('val_dataset') config.add_argument('output') config.add_argument('evaluation') config.add_argument('runners') config.add_argument('test_datasets', required=False, default=[]) config.add_argument('logging_period', required=False, default=20) config.add_argument('validation_period', required=False, default=500) config.add_argument('val_preview_input_series', required=False, default=None) config.add_argument('val_preview_output_series', required=False, default=None) config.add_argument('val_preview_num_examples', required=False, default=15) config.add_argument('train_start_offset', required=False, default=0) config.add_argument('runners_batch_size', required=False, default=None) config.add_argument('minimize', required=False, default=False) config.add_argument('postprocess') config.add_argument('name') config.add_argument('random_seed', required=False) config.add_argument('initial_variables', required=False, default=None) config.add_argument('overwrite_output_dir', required=False, default=False) return config
def create_config(train_mode: bool = True) -> Configuration: config = Configuration() config.add_argument("tf_manager", required=False, default=None) config.add_argument("batch_size", required=False, default=None, cond=lambda x: x is None or x > 0) config.add_argument("output") config.add_argument("postprocess", required=False, default=None) config.add_argument("runners") if train_mode: config.add_argument("epochs", cond=lambda x: x >= 0) config.add_argument("trainer") config.add_argument("train_dataset") config.add_argument("val_dataset", required=False, default=[]) config.add_argument("evaluation") config.add_argument("test_datasets", required=False, default=[]) config.add_argument("logging_period", required=False, default=20) config.add_argument("validation_period", required=False, default=500) config.add_argument("visualize_embeddings", required=False, default=None) config.add_argument("val_preview_input_series", required=False, default=None) config.add_argument("val_preview_output_series", required=False, default=None) config.add_argument("val_preview_num_examples", required=False, default=15) config.add_argument("train_start_offset", required=False, default=0) config.add_argument("name", required=False, default="Neural Monkey Experiment") config.add_argument("random_seed", required=False, default=2574600) config.add_argument("initial_variables", required=False, default=None) config.add_argument("overwrite_output_dir", required=False, default=False) else: config.add_argument("evaluation", required=False, default=None) for argument in _TRAIN_ARGS: config.ignore_argument(argument) return config
import os import argparse from neuralmonkey.logging import log, log_print from neuralmonkey.config.configuration import Configuration from neuralmonkey.learning_utils import (evaluation, run_on_dataset, print_final_evaluation) CONFIG = Configuration() CONFIG.add_argument("tf_manager") CONFIG.add_argument("output") CONFIG.add_argument("postprocess") CONFIG.add_argument("evaluation") CONFIG.add_argument("runners") CONFIG.add_argument("batch_size") CONFIG.add_argument("threads", required=False, default=4) CONFIG.add_argument("runners_batch_size", required=False, default=None) # ignore arguments which are just for training CONFIG.ignore_argument("val_dataset") CONFIG.ignore_argument("trainer") CONFIG.ignore_argument("name") CONFIG.ignore_argument("train_dataset") CONFIG.ignore_argument("epochs") CONFIG.ignore_argument("test_datasets") CONFIG.ignore_argument("initial_variables") CONFIG.ignore_argument("validation_period") CONFIG.ignore_argument("val_preview_input_series") CONFIG.ignore_argument("val_preview_output_series") CONFIG.ignore_argument("val_preview_num_examples") CONFIG.ignore_argument("logging_period") CONFIG.ignore_argument("visualize_embeddings")
def create_config(config_file): config = Configuration() config.add_argument('name', str) config.add_argument('random_seed', int, required=False) config.add_argument('output', str) config.add_argument('epochs', int, cond=lambda x: x >= 0) config.add_argument('trainer') config.add_argument('encoders', list) config.add_argument('decoder') config.add_argument('batch_size', int, cond=lambda x: x > 0) config.add_argument('train_dataset', Dataset) config.add_argument('val_dataset', Dataset) config.add_argument('postprocess') config.add_argument('evaluation', cond=list) config.add_argument('runner') config.add_argument('test_datasets', list, required=False, default=[]) config.add_argument('initial_variables', str, required=False, default=[]) config.add_argument('validation_period', int, required=False, default=500) config.add_argument('logging_period', int, required=False, default=20) config.add_argument('threads', int, required=False, default=4) config.add_argument('minimize', bool, required=False, default=False) config.add_argument('save_n_best', int, required=False, default=1) config.add_argument('overwrite_output_dir', bool, required=False, default=False) return config.load_file(config_file)
def create_config(train_mode: bool = True) -> Configuration: config = Configuration() config.add_argument("tf_manager", required=False, default=None) config.add_argument("batch_size", required=False, default=None, cond=lambda x: x is None or x > 0) config.add_argument("output") config.add_argument("postprocess", required=False, default=None) config.add_argument("runners") config.add_argument("random_seed", required=False, default=2574600) if train_mode: config.add_argument("epochs", cond=lambda x: x >= 0) config.add_argument("trainer") config.add_argument("train_dataset") config.add_argument("val_dataset", required=False, default=[]) config.add_argument("evaluation") config.add_argument("test_datasets", required=False, default=[]) config.add_argument("logging_period", required=False, default=20) config.add_argument("validation_period", required=False, default=500) config.add_argument("visualize_embeddings", required=False, default=None) config.add_argument("val_preview_input_series", required=False, default=None) config.add_argument("val_preview_output_series", required=False, default=None) config.add_argument("val_preview_num_examples", required=False, default=15) config.add_argument("train_start_offset", required=False, default=0) config.add_argument("name", required=False, default="Neural Monkey Experiment") config.add_argument("initial_variables", required=False, default=None) config.add_argument("overwrite_output_dir", required=False, default=False) else: config.add_argument("evaluation", required=False, default=None) for argument in _TRAIN_ARGS: config.ignore_argument(argument) return config
import os import argparse from neuralmonkey.logging import log, log_print from neuralmonkey.config.configuration import Configuration from neuralmonkey.learning_utils import (evaluation, run_on_dataset, print_final_evaluation) CONFIG = Configuration() CONFIG.add_argument('tf_manager') CONFIG.add_argument('output') CONFIG.add_argument('postprocess') CONFIG.add_argument('evaluation') CONFIG.add_argument('runners') CONFIG.add_argument('batch_size') CONFIG.add_argument('threads', required=False, default=4) CONFIG.add_argument('runners_batch_size', required=False, default=None) # ignore arguments which are just for training CONFIG.ignore_argument('val_dataset') CONFIG.ignore_argument('trainer') CONFIG.ignore_argument('name') CONFIG.ignore_argument('train_dataset') CONFIG.ignore_argument('epochs') CONFIG.ignore_argument('test_datasets') CONFIG.ignore_argument('initial_variables') CONFIG.ignore_argument('validation_period') CONFIG.ignore_argument('val_preview_input_series') CONFIG.ignore_argument('val_preview_output_series') CONFIG.ignore_argument('val_preview_num_examples') CONFIG.ignore_argument('logging_period') CONFIG.ignore_argument('visualize_embeddings')
def main() -> None: # pylint: disable=no-member,broad-except parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("config", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument('datasets', metavar='INI-TEST-DATASETS', help="the configuration of the test datasets") parser.add_argument("-g", "--grid", dest="grid", action="store_true", help="look at the SGE variables for slicing the data") args = parser.parse_args() test_datasets = Configuration() test_datasets.add_argument('test_datasets') test_datasets.add_argument('variables') CONFIG.load_file(args.config) CONFIG.build_model() test_datasets.load_file(args.datasets) test_datasets.build_model() datasets_model = test_datasets.model initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager, datasets_model.variables) print("") evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e for e in CONFIG.model.evaluation] if args.grid and len(datasets_model.test_datasets) > 1: raise ValueError("Only one test dataset supported when using --grid") for dataset in datasets_model.test_datasets: if args.grid: if ("SGE_TASK_FIRST" not in os.environ or "SGE_TASK_LAST" not in os.environ or "SGE_TASK_STEPSIZE" not in os.environ or "SGE_TASK_ID" not in os.environ): raise EnvironmentError( "Some SGE environment variables are missing") length = int(os.environ["SGE_TASK_STEPSIZE"]) start = int(os.environ["SGE_TASK_ID"]) - 1 end = int(os.environ["SGE_TASK_LAST"]) - 1 if start + length > end: length = end - start + 1 log("Running grid task {} starting at {} with step {}".format( start // length, start, length)) dataset = dataset.subset(start, length) if CONFIG.model.runners_batch_size is None: runners_batch_size = CONFIG.model.batch_size else: runners_batch_size = CONFIG.model.runners_batch_size execution_results, output_data = run_on_dataset( CONFIG.model.tf_manager, CONFIG.model.runners, dataset, CONFIG.model.postprocess, write_out=True, batch_size=runners_batch_size, log_progress=60) # TODO what if there is no ground truth eval_result = evaluation(evaluators, dataset, CONFIG.model.runners, execution_results, output_data) if eval_result: print_final_evaluation(dataset.name, eval_result)
# tests: lint, mypy import sys import os from neuralmonkey.logging import log from neuralmonkey.config.configuration import Configuration from neuralmonkey.checking import check_dataset_and_coders from neuralmonkey.learning_utils import initialize_tf, run_on_dataset, \ print_dataset_evaluation CONFIG = Configuration() CONFIG.add_argument('output', str) CONFIG.add_argument('encoders', list, cond=lambda l: len(l) > 0) CONFIG.add_argument('decoder') CONFIG.add_argument('postprocess') CONFIG.add_argument('evaluation', cond=list) CONFIG.add_argument('runner') CONFIG.add_argument('threads', int, required=False, default=4) # ignore arguments which are just for training CONFIG.ignore_argument('val_dataset') CONFIG.ignore_argument('trainer') CONFIG.ignore_argument('name') CONFIG.ignore_argument('train_dataset') CONFIG.ignore_argument('random_seed') CONFIG.ignore_argument('epochs') CONFIG.ignore_argument('batch_size') CONFIG.ignore_argument('tests_datasets') CONFIG.ignore_argument('initial_variables') CONFIG.ignore_argument('validation_period')
def create_config() -> Configuration: config = Configuration() # training loop arguments config.add_argument("tf_manager") config.add_argument("epochs", cond=lambda x: x >= 0) config.add_argument("trainer") config.add_argument("batch_size", cond=lambda x: x > 0) config.add_argument("train_dataset") config.add_argument("val_dataset") config.add_argument("output") config.add_argument("evaluation") config.add_argument("runners") config.add_argument("test_datasets", required=False, default=[]) config.add_argument("logging_period", required=False, default=20) config.add_argument("validation_period", required=False, default=500) config.add_argument("visualize_embeddings", required=False, default=None) config.add_argument("val_preview_input_series", required=False, default=None) config.add_argument("val_preview_output_series", required=False, default=None) config.add_argument("val_preview_num_examples", required=False, default=15) config.add_argument("train_start_offset", required=False, default=0) config.add_argument("runners_batch_size", required=False, default=None) config.add_argument("postprocess") config.add_argument("name") config.add_argument("random_seed", required=False) config.add_argument("initial_variables", required=False, default=None) config.add_argument("overwrite_output_dir", required=False, default=False) return config
# tests: lint, mypy import sys import os from neuralmonkey.logging import log, log_print from neuralmonkey.config.configuration import Configuration from neuralmonkey.learning_utils import (evaluation, run_on_dataset, print_final_evaluation) from neuralmonkey.tf_manager import TensorFlowManager CONFIG = Configuration() CONFIG.add_argument('tf_manager', TensorFlowManager) CONFIG.add_argument('output', str) CONFIG.add_argument('postprocess') CONFIG.add_argument('evaluation', list) CONFIG.add_argument('runners', list) CONFIG.add_argument('threads', int, required=False, default=4) CONFIG.add_argument('runners_batch_size', int, required=False, default=None) # ignore arguments which are just for training CONFIG.ignore_argument('val_dataset') CONFIG.ignore_argument('trainer') CONFIG.ignore_argument('name') CONFIG.ignore_argument('train_dataset') CONFIG.ignore_argument('epochs') CONFIG.ignore_argument('batch_size') CONFIG.ignore_argument('test_datasets') CONFIG.ignore_argument('initial_variables') CONFIG.ignore_argument('validation_period') CONFIG.ignore_argument('logging_period') CONFIG.ignore_argument('minimize')
def main() -> None: # pylint: disable=no-member,broad-except parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("config", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument("datasets", metavar="INI-FILE", help="the configuration file of the experiment") parser.add_argument("--beam", metavar="BEAM_SIZE", type=int, default=10, help="Beam size.") parser.add_argument("--kenlm", type=str, default=None, help="Path to a KenLM model arpa file.") parser.add_argument("--lm-weight", type=float, help="Weight of the language model.") parser.add_argument("--null-trail-weight", type=float, help="Weight of the null-trailing feature.") parser.add_argument("--nt-ratio-weight", type=float, help="Weight of the null-token ratio feature.") parser.add_argument("--out", type=str, help="Path to the output file.") args = parser.parse_args() test_datasets = Configuration() test_datasets.add_argument("test_datasets") test_datasets.add_argument("batch_size", cond=lambda x: x > 0) test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list)) test_datasets.load_file(args.datasets) test_datasets.build_model() datasets_model = test_datasets.model exp = Experiment(config_path=args.config) exp.build_model() exp.load_variables(datasets_model.variables) ctc_decoder = None for runner in exp.model.runners: if (isinstance(runner, PlainRunner) and isinstance(runner.decoder, CTCDecoder)): ctc_decoder = runner.decoder break if ctc_decoder is None: raise ValueError( "Was not able to detect CTC decoder in the configuration.") logits_runner = RepresentationRunner(output_series="logits", encoder=ctc_decoder, attribute="logits") exp.model.runners = [logits_runner] dataset = datasets_model.test_datasets[0] singleton_batches = dataset.batches(BatchingScheme(1)) print("Loading language model") lm = NGramModel(args.kenlm) print("LM loaded") weights = {} if args.lm_weight: weights['lm_score'] = args.lm_weight if args.null_trail_weight: weights['null_trailing'] = args.null_trail_weight if args.nt_ratio_weight: weights['null_token_ratio'] = args.nt_ratio_weight print("Weights:", weights) i = 0 stats = [] with open(args.out, 'w') as out_file: for sent_dataset in singleton_batches: t1 = timeit.default_timer() ctc_model_result = exp.run_model(sent_dataset, write_out=False, batch_size=1) t2 = timeit.default_timer() logits = np.squeeze(ctc_model_result[1]['logits'], axis=1) t3 = timeit.default_timer() best_hyp = decode_beam(logits, args.beam, ctc_decoder.vocabulary, lm=lm, weights=weights) t4 = timeit.default_timer() stats.append([len(best_hyp.tokens), t2 - t1, t4 - t3]) output = " ".join([best_hyp.tokens][0]) out_file.write(output + "\n") if i % 10 == 0: print("[{}] {}".format(i, output)) i += 1 with open(args.out + ".stats", 'w') as stats_file: for line in stats: stats_file.write("{} {:.3f} {:.3f}\n".format(*line)) for session in exp.config.model.tf_manager.sessions: session.close()