Esempio n. 1
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description="Runs Neural Monkey as a web server.")
    parser.add_argument("--port", type=int, default=5000)
    parser.add_argument("--host", type=str, default="127.0.0.1")
    parser.add_argument("--configuration", type=str, required=True)
    parser.add_argument("--preprocess", type=str,
                        required=False, default=None)
    args = parser.parse_args()

    print("")

    if args.preprocess is not None:
        preprocessing = Configuration()
        preprocessing.add_argument("preprocess")
        preprocessing.load_file(args.preprocess)
        preprocessing.build_model()
        APP.config["preprocess"] = preprocessing.model.preprocess
    else:
        APP.config["preprocess"] = []

    exp = Experiment(config_path=args.configuration)
    exp.build_model()
    APP.config["experiment"] = exp
    APP.run(port=args.port, host=args.host)
Esempio n. 2
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    if len(sys.argv) != 3:
        print("Usage: run.py <run_ini_file> <test_datasets>")
        exit(1)

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(sys.argv[1])
    CONFIG.build_model()
    test_datasets.load_file(sys.argv[2])
    test_datasets.build_model()
    datesets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datesets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    for dataset in datesets_model.test_datasets:
        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)
Esempio n. 3
0
def main():
    # pylint: disable=no-member,broad-except
    if len(sys.argv) != 3:
        print("Usage: run.py <run_ini_file> <test_datasets>")
        exit(1)

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')

    args, sess = initialize_for_running(sys.argv[1])

    datasets_args = test_datasets.load_file(sys.argv[2])
    print("")

    try:
        for dataset in datasets_args.test_datasets:
            check_dataset_and_coders(dataset, args.encoders)
    except Exception as exc:
        log(exc.message, color='red')
        exit(1)

    for dataset in datasets_args.test_datasets:
        _, _, evaluation = run_on_dataset(
            sess, args.runner, args.encoders + [args.decoder], args.decoder,
            dataset, args.evaluation, args.postprocess, write_out=True)
        if evaluation:
            print_dataset_evaluation(dataset.name, evaluation)
Esempio n. 4
0
def main():
    # pylint: disable=no-member,broad-except
    if len(sys.argv) != 3:
        print("Usage: run.py <run_ini_file> <test_datasets>")
        exit(1)

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')

    args, sess = initialize_for_running(sys.argv[1])

    datasets_args = test_datasets.load_file(sys.argv[2])
    print("")

    try:
        for dataset in datasets_args.test_datasets:
            check_dataset_and_coders(dataset, args.encoders)
    except Exception as exc:
        log(str(exc), color='red')
        exit(1)

    for dataset in datasets_args.test_datasets:
        _, _, evaluation = run_on_dataset(sess,
                                          args.runner,
                                          args.encoders + [args.decoder],
                                          args.decoder,
                                          dataset,
                                          args.evaluation,
                                          args.postprocess,
                                          write_out=True)
        if evaluation:
            print_dataset_evaluation(dataset.name, evaluation)
Esempio n. 5
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-TEST-DATASETS",
                        help="the configuration of the test datasets")
    parser.add_argument("-g",
                        "--grid",
                        dest="grid",
                        action="store_true",
                        help="look at the SGE variables for slicing the data")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    if args.grid and len(datasets_model.test_datasets) > 1:
        raise ValueError("Only one test dataset supported when using --grid")

    for dataset in datasets_model.test_datasets:
        if args.grid:
            if ("SGE_TASK_FIRST" not in os.environ
                    or "SGE_TASK_LAST" not in os.environ
                    or "SGE_TASK_STEPSIZE" not in os.environ
                    or "SGE_TASK_ID" not in os.environ):
                raise EnvironmentError(
                    "Some SGE environment variables are missing")

            length = int(os.environ["SGE_TASK_STEPSIZE"])
            start = int(os.environ["SGE_TASK_ID"]) - 1
            end = int(os.environ["SGE_TASK_LAST"]) - 1

            if start + length > end:
                length = end - start + 1

            log("Running grid task {} starting at {} with step {}".format(
                start // length, start, length))

            dataset = dataset.subset(start, length)

        if exp.config.args.evaluation is None:
            exp.run_model(dataset, write_out=True)
        else:
            exp.evaluate(dataset, write_out=True)

    for session in exp.config.model.tf_manager.sessions:
        session.close()
Esempio n. 6
0
def load_runtime_config(config_path: str) -> argparse.Namespace:
    """Load a runtime configuration file."""
    cfg = Configuration()
    cfg.add_argument("test_datasets")
    cfg.add_argument("variables", cond=lambda x: isinstance(x, list))

    cfg.load_file(config_path)
    cfg.build_model()
    return cfg.model
Esempio n. 7
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description="Runs Neural Monkey as a web server.")
    parser.add_argument("--port", type=int, default=5000)
    parser.add_argument("--host", type=str, default="127.0.0.1")
    parser.add_argument("--configuration", type=str, required=True)
    parser.add_argument("--preprocess", type=str, required=False, default=None)
    args = parser.parse_args()

    print("")

    if args.preprocess is not None:
        preprocessing = Configuration()
        preprocessing.add_argument("preprocess")
        preprocessing.load_file(args.preprocess)
        preprocessing.build_model()
        APP.config["preprocess"] = preprocessing.model.preprocess
    else:
        APP.config["preprocess"] = []

    exp = Experiment(config_path=args.configuration)
    exp.build_model()
    APP.config["experiment"] = exp
    APP.run(port=args.port, host=args.host)
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("--beam",
                        metavar="BEAM_SIZE",
                        type=int,
                        default=10,
                        help="Beam size.")
    parser.add_argument("--kenlm",
                        type=str,
                        help="Path to a KenLM model arpa file.")
    parser.add_argument("--prefix",
                        type=str,
                        help="Path used as a prefix of stored checkpoints.")
    parser.add_argument("--lm-weight",
                        type=float,
                        help="Default weight of the language model.")
    parser.add_argument("--null-trail-weight",
                        type=float,
                        help="Default weight of the null-trailing feature.")
    parser.add_argument("--nt-ratio-weight",
                        type=float,
                        help="Default weight of the null-token ratio feature.")

    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("batch_size", cond=lambda x: x > 0)
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    weights = {}

    if args.lm_weight is not None:
        weights['lm_score'] = args.lm_weight

    if args.null_trail_weight is not None:
        weights['null_trailing'] = args.null_trail_weight

    if args.nt_ratio_weight is not None:
        weights['null_token_ratio'] = args.nt_ratio_weight

    if not weights:
        raise ValueError("No default weights specified, nothing to train.")

    ctc_decoder = None
    for runner in exp.model.runners:
        if (isinstance(runner, PlainRunner)
                and isinstance(runner.decoder, CTCDecoder)):
            ctc_decoder = runner.decoder
            break

    if ctc_decoder is None:
        raise ValueError(
            "Was not able to detect CTC decoder in the configuration.")

    print("Loading language model")
    lm = NGramModel(args.kenlm)
    print("LM loaded")

    logits_runner = RepresentationRunner(output_series="logits",
                                         encoder=ctc_decoder,
                                         attribute="logits")
    exp.model.runners = [logits_runner]

    dataset = datasets_model.test_datasets[0]
    singleton_batches = dataset.batches(BatchingScheme(1))

    DATASET_SIZE = dataset.length
    CHECKPOINTS = 5
    CHECKPOINT_ITERS = int(DATASET_SIZE / CHECKPOINTS)

    print(
        "{} sentences in the dataset, checkpoint every {} sentences ({} checkpoints in total)."
        .format(DATASET_SIZE, CHECKPOINT_ITERS, CHECKPOINTS))

    for i, sent_dataset in enumerate(singleton_batches):
        ctc_model_result = exp.run_model(sent_dataset,
                                         write_out=False,
                                         batch_size=1)

        logits = np.squeeze(ctc_model_result[1]['logits'], axis=1)
        target = ctc_model_result[2]['target'][0]

        train_weights(logits, args.beam, ctc_decoder.vocabulary, target,
                      weights, lm)

        print(
            "[{}] Weights:".format(i + 1), ", ".join([
                "{}: {:.3f}".format(key, value)
                for key, value in weights.items()
            ]))

        if i != 0 and (i + 1) % CHECKPOINT_ITERS == 0:
            with open("{}.{}".format(args.prefix, int(i / CHECKPOINT_ITERS)),
                      "w") as f:
                for key, value in weights.items():
                    f.write("{}={:.3f}\n".format(key.upper(), value))

            print("\nCheckpoint saved.\n")

    for session in exp.config.model.tf_manager.sessions:
        session.close()
Esempio n. 9
0
def create_config() -> Configuration:
    config = Configuration()

    # training loop arguments
    config.add_argument('tf_manager')
    config.add_argument('epochs', cond=lambda x: x >= 0)
    config.add_argument('trainer')
    config.add_argument('batch_size', cond=lambda x: x > 0)
    config.add_argument('train_dataset')
    config.add_argument('val_dataset')
    config.add_argument('output')
    config.add_argument('evaluation')
    config.add_argument('runners')
    config.add_argument('test_datasets', required=False, default=[])
    config.add_argument('logging_period', required=False, default=20)
    config.add_argument('validation_period', required=False, default=500)
    config.add_argument('val_preview_input_series',
                        required=False,
                        default=None)
    config.add_argument('val_preview_output_series',
                        required=False,
                        default=None)
    config.add_argument('val_preview_num_examples', required=False, default=15)
    config.add_argument('train_start_offset', required=False, default=0)
    config.add_argument('runners_batch_size', required=False, default=None)
    config.add_argument('minimize', required=False, default=False)
    config.add_argument('postprocess')
    config.add_argument('name')
    config.add_argument('random_seed', required=False)
    config.add_argument('initial_variables', required=False, default=None)
    config.add_argument('overwrite_output_dir', required=False, default=False)

    return config
Esempio n. 10
0
def create_config(train_mode: bool = True) -> Configuration:
    config = Configuration()
    config.add_argument("tf_manager", required=False, default=None)
    config.add_argument("batch_size",
                        required=False,
                        default=None,
                        cond=lambda x: x is None or x > 0)
    config.add_argument("output")
    config.add_argument("postprocess", required=False, default=None)
    config.add_argument("runners")

    if train_mode:
        config.add_argument("epochs", cond=lambda x: x >= 0)
        config.add_argument("trainer")
        config.add_argument("train_dataset")
        config.add_argument("val_dataset", required=False, default=[])
        config.add_argument("evaluation")
        config.add_argument("test_datasets", required=False, default=[])
        config.add_argument("logging_period", required=False, default=20)
        config.add_argument("validation_period", required=False, default=500)
        config.add_argument("visualize_embeddings",
                            required=False,
                            default=None)
        config.add_argument("val_preview_input_series",
                            required=False,
                            default=None)
        config.add_argument("val_preview_output_series",
                            required=False,
                            default=None)
        config.add_argument("val_preview_num_examples",
                            required=False,
                            default=15)
        config.add_argument("train_start_offset", required=False, default=0)
        config.add_argument("name",
                            required=False,
                            default="Neural Monkey Experiment")
        config.add_argument("random_seed", required=False, default=2574600)
        config.add_argument("initial_variables", required=False, default=None)
        config.add_argument("overwrite_output_dir",
                            required=False,
                            default=False)
    else:
        config.add_argument("evaluation", required=False, default=None)
        for argument in _TRAIN_ARGS:
            config.ignore_argument(argument)

    return config
Esempio n. 11
0
import os
import argparse

from neuralmonkey.logging import log, log_print
from neuralmonkey.config.configuration import Configuration
from neuralmonkey.learning_utils import (evaluation, run_on_dataset,
                                         print_final_evaluation)

CONFIG = Configuration()
CONFIG.add_argument("tf_manager")
CONFIG.add_argument("output")
CONFIG.add_argument("postprocess")
CONFIG.add_argument("evaluation")
CONFIG.add_argument("runners")
CONFIG.add_argument("batch_size")
CONFIG.add_argument("threads", required=False, default=4)
CONFIG.add_argument("runners_batch_size", required=False, default=None)
# ignore arguments which are just for training
CONFIG.ignore_argument("val_dataset")
CONFIG.ignore_argument("trainer")
CONFIG.ignore_argument("name")
CONFIG.ignore_argument("train_dataset")
CONFIG.ignore_argument("epochs")
CONFIG.ignore_argument("test_datasets")
CONFIG.ignore_argument("initial_variables")
CONFIG.ignore_argument("validation_period")
CONFIG.ignore_argument("val_preview_input_series")
CONFIG.ignore_argument("val_preview_output_series")
CONFIG.ignore_argument("val_preview_num_examples")
CONFIG.ignore_argument("logging_period")
CONFIG.ignore_argument("visualize_embeddings")
Esempio n. 12
0
def create_config(config_file):
    config = Configuration()
    config.add_argument('name', str)
    config.add_argument('random_seed', int, required=False)
    config.add_argument('output', str)
    config.add_argument('epochs', int, cond=lambda x: x >= 0)
    config.add_argument('trainer')
    config.add_argument('encoders', list)
    config.add_argument('decoder')
    config.add_argument('batch_size', int, cond=lambda x: x > 0)
    config.add_argument('train_dataset', Dataset)
    config.add_argument('val_dataset', Dataset)
    config.add_argument('postprocess')
    config.add_argument('evaluation', cond=list)
    config.add_argument('runner')
    config.add_argument('test_datasets', list, required=False, default=[])
    config.add_argument('initial_variables', str, required=False, default=[])
    config.add_argument('validation_period', int, required=False, default=500)
    config.add_argument('logging_period', int, required=False, default=20)
    config.add_argument('threads', int, required=False, default=4)
    config.add_argument('minimize', bool, required=False, default=False)
    config.add_argument('save_n_best', int, required=False, default=1)
    config.add_argument('overwrite_output_dir', bool, required=False,
                        default=False)

    return config.load_file(config_file)
Esempio n. 13
0
def create_config(train_mode: bool = True) -> Configuration:
    config = Configuration()
    config.add_argument("tf_manager", required=False, default=None)
    config.add_argument("batch_size", required=False, default=None,
                        cond=lambda x: x is None or x > 0)
    config.add_argument("output")
    config.add_argument("postprocess", required=False, default=None)
    config.add_argument("runners")
    config.add_argument("random_seed", required=False, default=2574600)

    if train_mode:
        config.add_argument("epochs", cond=lambda x: x >= 0)
        config.add_argument("trainer")
        config.add_argument("train_dataset")
        config.add_argument("val_dataset", required=False, default=[])
        config.add_argument("evaluation")
        config.add_argument("test_datasets", required=False, default=[])
        config.add_argument("logging_period", required=False, default=20)
        config.add_argument("validation_period", required=False, default=500)
        config.add_argument("visualize_embeddings", required=False,
                            default=None)
        config.add_argument("val_preview_input_series",
                            required=False, default=None)
        config.add_argument("val_preview_output_series",
                            required=False, default=None)
        config.add_argument("val_preview_num_examples",
                            required=False, default=15)
        config.add_argument("train_start_offset", required=False, default=0)
        config.add_argument("name", required=False,
                            default="Neural Monkey Experiment")
        config.add_argument("initial_variables", required=False, default=None)
        config.add_argument("overwrite_output_dir", required=False,
                            default=False)
    else:
        config.add_argument("evaluation", required=False, default=None)
        for argument in _TRAIN_ARGS:
            config.ignore_argument(argument)

    return config
Esempio n. 14
0
def create_config(config_file):
    config = Configuration()
    config.add_argument('name', str)
    config.add_argument('random_seed', int, required=False)
    config.add_argument('output', str)
    config.add_argument('epochs', int, cond=lambda x: x >= 0)
    config.add_argument('trainer')
    config.add_argument('encoders', list)
    config.add_argument('decoder')
    config.add_argument('batch_size', int, cond=lambda x: x > 0)
    config.add_argument('train_dataset', Dataset)
    config.add_argument('val_dataset', Dataset)
    config.add_argument('postprocess')
    config.add_argument('evaluation', cond=list)
    config.add_argument('runner')
    config.add_argument('test_datasets', list, required=False, default=[])
    config.add_argument('initial_variables', str, required=False, default=[])
    config.add_argument('validation_period', int, required=False, default=500)
    config.add_argument('logging_period', int, required=False, default=20)
    config.add_argument('threads', int, required=False, default=4)
    config.add_argument('minimize', bool, required=False, default=False)
    config.add_argument('save_n_best', int, required=False, default=1)
    config.add_argument('overwrite_output_dir',
                        bool,
                        required=False,
                        default=False)

    return config.load_file(config_file)
Esempio n. 15
0
import os
import argparse

from neuralmonkey.logging import log, log_print
from neuralmonkey.config.configuration import Configuration
from neuralmonkey.learning_utils import (evaluation, run_on_dataset,
                                         print_final_evaluation)

CONFIG = Configuration()
CONFIG.add_argument('tf_manager')
CONFIG.add_argument('output')
CONFIG.add_argument('postprocess')
CONFIG.add_argument('evaluation')
CONFIG.add_argument('runners')
CONFIG.add_argument('batch_size')
CONFIG.add_argument('threads', required=False, default=4)
CONFIG.add_argument('runners_batch_size', required=False, default=None)
# ignore arguments which are just for training
CONFIG.ignore_argument('val_dataset')
CONFIG.ignore_argument('trainer')
CONFIG.ignore_argument('name')
CONFIG.ignore_argument('train_dataset')
CONFIG.ignore_argument('epochs')
CONFIG.ignore_argument('test_datasets')
CONFIG.ignore_argument('initial_variables')
CONFIG.ignore_argument('validation_period')
CONFIG.ignore_argument('val_preview_input_series')
CONFIG.ignore_argument('val_preview_output_series')
CONFIG.ignore_argument('val_preview_num_examples')
CONFIG.ignore_argument('logging_period')
CONFIG.ignore_argument('visualize_embeddings')
Esempio n. 16
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument('datasets',
                        metavar='INI-TEST-DATASETS',
                        help="the configuration of the test datasets")
    parser.add_argument("-g",
                        "--grid",
                        dest="grid",
                        action="store_true",
                        help="look at the SGE variables for slicing the data")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(args.config)
    CONFIG.build_model()
    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datasets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    if args.grid and len(datasets_model.test_datasets) > 1:
        raise ValueError("Only one test dataset supported when using --grid")

    for dataset in datasets_model.test_datasets:
        if args.grid:
            if ("SGE_TASK_FIRST" not in os.environ
                    or "SGE_TASK_LAST" not in os.environ
                    or "SGE_TASK_STEPSIZE" not in os.environ
                    or "SGE_TASK_ID" not in os.environ):
                raise EnvironmentError(
                    "Some SGE environment variables are missing")

            length = int(os.environ["SGE_TASK_STEPSIZE"])
            start = int(os.environ["SGE_TASK_ID"]) - 1
            end = int(os.environ["SGE_TASK_LAST"]) - 1

            if start + length > end:
                length = end - start + 1

            log("Running grid task {} starting at {} with step {}".format(
                start // length, start, length))

            dataset = dataset.subset(start, length)

        if CONFIG.model.runners_batch_size is None:
            runners_batch_size = CONFIG.model.batch_size
        else:
            runners_batch_size = CONFIG.model.runners_batch_size

        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True,
            batch_size=runners_batch_size,
            log_progress=60)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)
Esempio n. 17
0
# tests: lint, mypy

import sys
import os

from neuralmonkey.logging import log
from neuralmonkey.config.configuration import Configuration
from neuralmonkey.checking import check_dataset_and_coders
from neuralmonkey.learning_utils import initialize_tf, run_on_dataset, \
    print_dataset_evaluation

CONFIG = Configuration()
CONFIG.add_argument('output', str)
CONFIG.add_argument('encoders', list, cond=lambda l: len(l) > 0)
CONFIG.add_argument('decoder')
CONFIG.add_argument('postprocess')
CONFIG.add_argument('evaluation', cond=list)
CONFIG.add_argument('runner')
CONFIG.add_argument('threads', int, required=False, default=4)

# ignore arguments which are just for training
CONFIG.ignore_argument('val_dataset')
CONFIG.ignore_argument('trainer')
CONFIG.ignore_argument('name')
CONFIG.ignore_argument('train_dataset')
CONFIG.ignore_argument('random_seed')
CONFIG.ignore_argument('epochs')
CONFIG.ignore_argument('batch_size')
CONFIG.ignore_argument('tests_datasets')
CONFIG.ignore_argument('initial_variables')
CONFIG.ignore_argument('validation_period')
Esempio n. 18
0
def create_config() -> Configuration:
    config = Configuration()

    # training loop arguments
    config.add_argument("tf_manager")
    config.add_argument("epochs", cond=lambda x: x >= 0)
    config.add_argument("trainer")
    config.add_argument("batch_size", cond=lambda x: x > 0)
    config.add_argument("train_dataset")
    config.add_argument("val_dataset")
    config.add_argument("output")
    config.add_argument("evaluation")
    config.add_argument("runners")
    config.add_argument("test_datasets", required=False, default=[])
    config.add_argument("logging_period", required=False, default=20)
    config.add_argument("validation_period", required=False, default=500)
    config.add_argument("visualize_embeddings", required=False, default=None)
    config.add_argument("val_preview_input_series",
                        required=False,
                        default=None)
    config.add_argument("val_preview_output_series",
                        required=False,
                        default=None)
    config.add_argument("val_preview_num_examples", required=False, default=15)
    config.add_argument("train_start_offset", required=False, default=0)
    config.add_argument("runners_batch_size", required=False, default=None)
    config.add_argument("postprocess")
    config.add_argument("name")
    config.add_argument("random_seed", required=False)
    config.add_argument("initial_variables", required=False, default=None)
    config.add_argument("overwrite_output_dir", required=False, default=False)

    return config
Esempio n. 19
0
# tests: lint, mypy

import sys
import os

from neuralmonkey.logging import log, log_print
from neuralmonkey.config.configuration import Configuration
from neuralmonkey.learning_utils import (evaluation, run_on_dataset,
                                         print_final_evaluation)
from neuralmonkey.tf_manager import TensorFlowManager

CONFIG = Configuration()
CONFIG.add_argument('tf_manager', TensorFlowManager)
CONFIG.add_argument('output', str)
CONFIG.add_argument('postprocess')
CONFIG.add_argument('evaluation', list)
CONFIG.add_argument('runners', list)
CONFIG.add_argument('threads', int, required=False, default=4)
CONFIG.add_argument('runners_batch_size', int, required=False, default=None)
# ignore arguments which are just for training
CONFIG.ignore_argument('val_dataset')
CONFIG.ignore_argument('trainer')
CONFIG.ignore_argument('name')
CONFIG.ignore_argument('train_dataset')
CONFIG.ignore_argument('epochs')
CONFIG.ignore_argument('batch_size')
CONFIG.ignore_argument('test_datasets')
CONFIG.ignore_argument('initial_variables')
CONFIG.ignore_argument('validation_period')
CONFIG.ignore_argument('logging_period')
CONFIG.ignore_argument('minimize')
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("--beam",
                        metavar="BEAM_SIZE",
                        type=int,
                        default=10,
                        help="Beam size.")
    parser.add_argument("--kenlm",
                        type=str,
                        default=None,
                        help="Path to a KenLM model arpa file.")
    parser.add_argument("--lm-weight",
                        type=float,
                        help="Weight of the language model.")
    parser.add_argument("--null-trail-weight",
                        type=float,
                        help="Weight of the null-trailing feature.")
    parser.add_argument("--nt-ratio-weight",
                        type=float,
                        help="Weight of the null-token ratio feature.")
    parser.add_argument("--out", type=str, help="Path to the output file.")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("batch_size", cond=lambda x: x > 0)
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    ctc_decoder = None
    for runner in exp.model.runners:
        if (isinstance(runner, PlainRunner)
                and isinstance(runner.decoder, CTCDecoder)):
            ctc_decoder = runner.decoder
            break

    if ctc_decoder is None:
        raise ValueError(
            "Was not able to detect CTC decoder in the configuration.")

    logits_runner = RepresentationRunner(output_series="logits",
                                         encoder=ctc_decoder,
                                         attribute="logits")
    exp.model.runners = [logits_runner]

    dataset = datasets_model.test_datasets[0]
    singleton_batches = dataset.batches(BatchingScheme(1))
    print("Loading language model")
    lm = NGramModel(args.kenlm)
    print("LM loaded")

    weights = {}

    if args.lm_weight:
        weights['lm_score'] = args.lm_weight

    if args.null_trail_weight:
        weights['null_trailing'] = args.null_trail_weight

    if args.nt_ratio_weight:
        weights['null_token_ratio'] = args.nt_ratio_weight

    print("Weights:", weights)

    i = 0
    stats = []

    with open(args.out, 'w') as out_file:
        for sent_dataset in singleton_batches:

            t1 = timeit.default_timer()
            ctc_model_result = exp.run_model(sent_dataset,
                                             write_out=False,
                                             batch_size=1)
            t2 = timeit.default_timer()

            logits = np.squeeze(ctc_model_result[1]['logits'], axis=1)

            t3 = timeit.default_timer()
            best_hyp = decode_beam(logits,
                                   args.beam,
                                   ctc_decoder.vocabulary,
                                   lm=lm,
                                   weights=weights)
            t4 = timeit.default_timer()

            stats.append([len(best_hyp.tokens), t2 - t1, t4 - t3])

            output = " ".join([best_hyp.tokens][0])
            out_file.write(output + "\n")

            if i % 10 == 0:
                print("[{}] {}".format(i, output))
            i += 1

    with open(args.out + ".stats", 'w') as stats_file:
        for line in stats:
            stats_file.write("{} {:.3f} {:.3f}\n".format(*line))

    for session in exp.config.model.tf_manager.sessions:
        session.close()
Esempio n. 21
0
# tests: lint, mypy

import sys
import os

from neuralmonkey.logging import log
from neuralmonkey.config.configuration import Configuration
from neuralmonkey.checking import check_dataset_and_coders
from neuralmonkey.learning_utils import initialize_tf, run_on_dataset, \
    print_dataset_evaluation

CONFIG = Configuration()
CONFIG.add_argument('output', str)
CONFIG.add_argument('encoders', list, cond=lambda l: len(l) > 0)
CONFIG.add_argument('decoder')
CONFIG.add_argument('postprocess')
CONFIG.add_argument('evaluation', cond=list)
CONFIG.add_argument('runner')
CONFIG.add_argument('threads', int, required=False, default=4)

# ignore arguments which are just for training
CONFIG.ignore_argument('val_dataset')
CONFIG.ignore_argument('trainer')
CONFIG.ignore_argument('name')
CONFIG.ignore_argument('train_dataset')
CONFIG.ignore_argument('random_seed')
CONFIG.ignore_argument('epochs')
CONFIG.ignore_argument('batch_size')
CONFIG.ignore_argument('tests_datasets')
CONFIG.ignore_argument('initial_variables')
CONFIG.ignore_argument('validation_period')