Beispiel #1
0
    def __iter__(self):
        """A generator for iterating through the lattices of this job.
        """

        file_type = TextFileType('r')

        for path in self._lattices:
            logging.info("Reading lattice file `%s´.", path)
            lattice_file = file_type(path)
            if self._lattice_format == 'slf':
                yield SLFLattice(lattice_file)
            else:
                assert self._lattice_format == 'kaldi'
                lattice_lines = []
                id_to_word = self.kaldi_id_to_word
                while True:
                    line = lattice_file.readline()
                    if not line:
                        # end of file
                        if lattice_lines:
                            yield KaldiLattice(lattice_lines, id_to_word)
                        break
                    line = line.strip()
                    if not line:
                        # empty line
                        if lattice_lines:
                            yield KaldiLattice(lattice_lines, id_to_word)
                        lattice_lines = []
                        continue
                    lattice_lines.append(line)
Beispiel #2
0
def add_arguments(parser):
    """Specifies the command line arguments supported by the "theanolm sample"
    command.

    :type parser: argparse.ArgumentParser
    :param parser: a command line argument parser
    """

    argument_group = parser.add_argument_group("files")
    argument_group.add_argument(
        'model_path',
        metavar='MODEL-FILE',
        type=str,
        help='the model file that will be used to generate text')
    argument_group.add_argument(
        '--output-file',
        metavar='FILE',
        type=TextFileType('w'),
        default='-',
        help='where to write the generated sentences (default stdout, will be '
        'compressed if the name ends in ".gz")')

    argument_group = parser.add_argument_group("sampling")
    argument_group.add_argument('--num-sentences',
                                metavar='N',
                                type=int,
                                default=10,
                                help='generate N sentences')
    argument_group.add_argument(
        '--random-seed',
        metavar='N',
        type=int,
        default=None,
        help='seed to initialize the random state (default is to seed from a '
        'random source provided by the oprating system)')
    argument_group.add_argument('--sentence-length',
                                metavar='N',
                                type=int,
                                default=30,
                                help='generate sentences of N tokens')
    argument_group.add_argument(
        '--seed-sequence',
        metavar='SEQUENCE',
        type=str,
        help=
        'Use SEQUENCE as seed; ie. first compute forward passes with the sequence, then generate'
    )

    argument_group = parser.add_argument_group("configuration")
    argument_group.add_argument(
        '--default-device',
        metavar='DEVICE',
        type=str,
        default=None,
        help='when multiple GPUs are present, use DEVICE as default')

    argument_group = parser.add_argument_group("debugging")
    argument_group.add_argument('--debug',
                                action="store_true",
                                help='enables debugging Theano errors')
Beispiel #3
0
def add_arguments(parser):
    """Specifies the command line arguments supported by the "theanolm score"
    command.

    :type parser: argparse.ArgumentParser
    :param parser: a command line argument parser
    """

    argument_group = parser.add_argument_group("files")
    argument_group.add_argument(
        'model_path',
        metavar='MODEL-FILE',
        type=str,
        help='the model file that will be used to score text')
    argument_group.add_argument(
        'input_file',
        metavar='TEXT-FILE',
        type=TextFileType('r'),
        help='text file containing text to be scored (UTF-8, one sentence per '
        'line, assumed to be compressed if the name ends in ".gz")')
    argument_group.add_argument(
        '--output-file',
        metavar='FILE',
        type=TextFileType('w'),
        default='-',
        help='where to write the statistics (default stdout, will be '
        'compressed if the name ends in ".gz")')

    argument_group = parser.add_argument_group("scoring")
    argument_group.add_argument(
        '--output',
        metavar='DETAIL',
        type=str,
        default='perplexity',
        choices=['perplexity', 'utterance-scores', 'word-scores'],
        help='what to output, one of "perplexity", "utterance-scores", '
        '"word-scores" (default "perplexity")')
    argument_group.add_argument(
        '--log-base',
        metavar='B',
        type=int,
        default=None,
        help='convert output log probabilities to base B (default is the '
        'natural logarithm)')
    argument_group.add_argument(
        '--exclude-unk',
        action="store_true",
        help="exclude <unk> tokens from perplexity computation")
    argument_group.add_argument(
        '--subwords',
        metavar='MARKING',
        type=str,
        default=None,
        choices=['word-boundary', 'prefix-affix', None],
        help='the subword vocabulary uses MARKING to indicate how words are '
        'formed from subwords; one of "word-boundary" (<w> token '
        'separates words), "prefix-affix" (subwords that can be '
        'concatenated are prefixed or affixed with +, e.g. "cat+ +s")')
    argument_group.add_argument(
        '--shortlist',
        action="store_true",
        help='distribute <unk> token probability among the out-of-shortlist '
        'words according to their unigram frequencies in the training '
        'data')

    argument_group = parser.add_argument_group("configuration")
    argument_group.add_argument(
        '--default-device',
        metavar='DEVICE',
        type=str,
        default=None,
        help='when multiple GPUs are present, use DEVICE as default')

    argument_group = parser.add_argument_group("logging and debugging")
    argument_group.add_argument(
        '--log-file',
        metavar='FILE',
        type=str,
        default='-',
        help='path where to write log file (default is standard output)')
    argument_group.add_argument(
        '--log-level',
        metavar='LEVEL',
        type=str,
        default='info',
        choices=['debug', 'info', 'warn'],
        help='minimum level of events to log, one of "debug", "info", "warn" '
        '(default "info")')
    argument_group.add_argument(
        '--debug',
        action="store_true",
        help='use test values to get better error messages from Theano')
    argument_group.add_argument('--profile',
                                action="store_true",
                                help='enable profiling Theano functions')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Reads a class definitions file and an n-gram counts file, and corrects the
# class expansion probabilities according to the unigram counts of the words.

import argparse
import sys
from theanolm.backend import TextFileType
from wordclasses import WordClasses, WordsToClasses
from ngramcounts import NGramCounts

parser = argparse.ArgumentParser()
parser.add_argument('classes',
                    type=TextFileType('r'),
                    help='input class definitions file')
parser.add_argument('counts',
                    type=TextFileType('r'),
                    help='n-gram counts file')
args = parser.parse_args()

classes = WordClasses()
classes.read(args.classes)

word_counts = NGramCounts()
word_counts.read(args.counts)

for cls in classes:
    counts = dict()
    for word, prob in cls:
        unigram = tuple([word])
Beispiel #5
0
def add_arguments(parser):
    """Specifies the command line arguments supported by the "theanolm train"
    command.

    :type parser: argparse.ArgumentParser
    :param parser: a command line argument parser
    """

    argument_group = parser.add_argument_group("data")
    argument_group.add_argument(
        'model_path',
        metavar='MODEL-FILE',
        type=str,
        help='path where the best model state will be saved in HDF5 binary '
        'data format')
    argument_group.add_argument(
        '--training-set',
        metavar='FILE',
        type=TextFileType('r'),
        nargs='+',
        required=True,
        help='text files containing training data (UTF-8, one sentence per '
        'line, assumed to be compressed if the name ends in ".gz")')
    argument_group.add_argument(
        '--validation-file',
        metavar='VALID-FILE',
        type=TextFileType('r'),
        default=None,
        help='text file containing validation data for early stopping (UTF-8, '
        'one sentence per line, assumed to be compressed if the name ends '
        'in ".gz")')

    argument_group = parser.add_argument_group("vocabulary")
    argument_group.add_argument(
        '--vocabulary',
        metavar='FILE',
        type=str,
        default=None,
        help='word or class vocabulary to be used in the neural network input '
        'and output, in the format specified by the --vocabulary-format '
        'argument (UTF-8 text, default is to use all the words from the '
        'training data)')
    argument_group.add_argument(
        '--vocabulary-format',
        metavar='FORMAT',
        type=str,
        default='words',
        choices=['words', 'classes', 'srilm-classes'],
        help='format of the file specified with --vocabulary argument, one of '
        '"words" (one word per line, default), "classes" (word and class '
        'ID per line), "srilm-classes" (class name, membership '
        'probability, and word per line)')
    argument_group.add_argument(
        '--num-classes',
        metavar='N',
        type=int,
        default=None,
        help='generate N classes using a simple word frequency based algorithm '
        'when --vocabulary argument is not given (default is to not use '
        'word classes)')

    argument_group = parser.add_argument_group("network architecture")
    argument_group.add_argument(
        '--architecture',
        metavar='FILE',
        type=str,
        default='lstm300',
        help='path to neural network architecture description, or a standard '
        'architecture name, "lstm300" or "lstm1500" (default "lstm300")')

    argument_group = parser.add_argument_group("training process")
    argument_group.add_argument(
        '--sampling',
        metavar='FRACTION',
        type=float,
        nargs='*',
        default=[],
        help='randomly sample only FRACTION of each training file on each '
        'epoch (list the fractions in the same order as the training '
        'files)')
    argument_group.add_argument(
        '--sequence-length',
        metavar='N',
        type=int,
        default=100,
        help='ignore sentences longer than N words (default 100)')
    argument_group.add_argument(
        '--batch-size',
        metavar='N',
        type=int,
        default=16,
        help='each mini-batch will contain N sentences (default 16)')
    argument_group.add_argument(
        '--validation-frequency',
        metavar='N',
        type=int,
        default='5',
        help='cross-validate for reducing learning rate or early stopping N '
        'times per training epoch (default 5)')
    argument_group.add_argument(
        '--patience',
        metavar='N',
        type=int,
        default=4,
        help='allow perplexity to increase N consecutive cross-validations, '
        'before decreasing learning rate; if less than zero, never '
        'decrease learning rate (default 4)')
    argument_group.add_argument(
        '--random-seed',
        metavar='N',
        type=int,
        default=None,
        help='seed to initialize the random state (default is to seed from a '
        'random source provided by the oprating system)')

    argument_group = parser.add_argument_group("optimization")
    argument_group.add_argument(
        '--optimization-method',
        metavar='NAME',
        type=str,
        default='adagrad',
        choices=[
            'sgd', 'nesterov', 'adagrad', 'adadelta', 'rmsprop-sgd',
            'rmsprop-nesterov', 'adam'
        ],
        help='optimization method, one of "sgd", "nesterov", "adagrad", '
        '"adadelta", "rmsprop-sgd", "rmsprop-nesterov", "adam" '
        '(default "adagrad")')
    argument_group.add_argument('--learning-rate',
                                metavar='ALPHA',
                                type=float,
                                default=0.1,
                                help='initial learning rate (default 0.1)')
    argument_group.add_argument(
        '--l1-regularization',
        metavar='LAMBDA',
        type=float,
        default=None,
        help='add L1 regularization term with weight LAMBDA to the cost')
    argument_group.add_argument(
        '--l2-regularization',
        metavar='LAMBDA',
        type=float,
        default=None,
        help='add L2 regularization term with weight LAMBDA to the cost')
    argument_group.add_argument(
        '--momentum',
        metavar='BETA',
        type=float,
        default=0.9,
        help='momentum coefficient for momentum optimization methods (default '
        '0.9)')
    argument_group.add_argument(
        '--gradient-decay-rate',
        metavar='GAMMA',
        type=float,
        default=0.9,
        help='geometric rate for averaging gradients (default 0.9)')
    argument_group.add_argument(
        '--sqr-gradient-decay-rate',
        metavar='GAMMA',
        type=float,
        default=0.999,
        help='geometric rate for averaging squared gradients in Adam optimizer '
        '(default 0.999)')
    argument_group.add_argument(
        '--numerical-stability-term',
        metavar='EPSILON',
        type=float,
        default=1e-6,
        help='a value that is used to prevent instability when dividing by '
        'very small numbers (default 1e-6)')
    argument_group.add_argument(
        '--gradient-normalization',
        metavar='THRESHOLD',
        type=float,
        default=5,
        help='scale down the gradients if necessary to make sure their norm '
        '(normalized by mini-batch size) will not exceed THRESHOLD '
        '(default 5)')
    argument_group.add_argument(
        '--cost',
        metavar='NAME',
        type=str,
        default='cross-entropy',
        choices=['cross-entropy', 'nce', 'blackout'],
        help='cost function, one of "cross-entropy" (default), "nce" '
        '(noise-contrastive estimation), or "blackout"')
    argument_group.add_argument(
        '--num-noise-samples',
        metavar='K',
        type=int,
        default=5,
        help='sampling based costs sample K noise words per one training word '
        '(default 5)')
    argument_group.add_argument(
        '--noise-distribution',
        metavar='DIST',
        type=str,
        default='uniform',
        choices=['uniform', 'log-uniform', 'unigram'],
        help='sample noise from DIST; one of "uniform" (default, but less '
        'accurate), "log-uniform" (the vocabulary should be ordered by '
        'decreasing frequency), "unigram" (unigram distribution of words '
        'in training data, slow)')
    argument_group.add_argument(
        '--noise-dampening',
        metavar='ALPHA',
        type=float,
        default=0.5,
        help='the empirical unigram distribution is raised to the power ALPHA '
        'before sampling noise words; 0.0 corresponds to the uniform '
        'distribution and 1.0 corresponds to the unigram distribution '
        '(only applicable with --noise-distribution=unigram, default 0.5)')
    argument_group.add_argument(
        '--noise-sharing',
        metavar='SHARING',
        type=str,
        default=None,
        choices=['seq', 'batch', None],
        help='can be "seq" for sharing noise samples between mini-batch '
        'sequences, or "batch" for sharing noise samples across einter '
        'mini-batch for improved speed (default is no sharing, which is '
        'very slow)')
    argument_group.add_argument(
        '--exclude-unk',
        action="store_true",
        help="exclude <unk> tokens from cost and perplexity computations")
    argument_group.add_argument(
        '--weights',
        metavar='LAMBDA',
        type=float,
        nargs='*',
        default=[],
        help='scale a mini-batch update by LAMBDA if the data is from the '
        'corresponding training file (list the weights in the same order '
        'as the training files)')

    argument_group = parser.add_argument_group("early stopping")
    argument_group.add_argument(
        '--stopping-criterion',
        metavar='NAME',
        type=str,
        default='annealing-count',
        choices=['epoch-count', 'no-improvement', 'annealing-count'],
        help='selects a criterion for early-stopping, one of "epoch-count" '
        '(fixed number of epochs), "no-improvement" (no improvement since '
        'learning rate was decreased), "annealing-count" (default, '
        'learning rate is decreased a fixed number of times)')
    argument_group.add_argument(
        '--min-epochs',
        metavar='N',
        type=int,
        default=1,
        help='perform at least N training epochs (default 1)')
    argument_group.add_argument(
        '--max-epochs',
        metavar='N',
        type=int,
        default=100,
        help='perform at most N training epochs (default 100)')
    argument_group.add_argument(
        '--max-annealing-count',
        metavar='N',
        type=int,
        default=0,
        help='when using annealing-count stopping criterion, continue training '
        'after decreasing learning rate at most N times (default 0)')

    argument_group = parser.add_argument_group("configuration")
    argument_group.add_argument(
        '--default-device',
        metavar='DEVICE',
        type=str,
        default=None,
        help='when multiple GPUs are present, use DEVICE as default')

    argument_group = parser.add_argument_group("logging and debugging")
    argument_group.add_argument(
        '--log-file',
        metavar='FILE',
        type=str,
        default='-',
        help='path where to write log file (default is standard output)')
    argument_group.add_argument(
        '--log-level',
        metavar='LEVEL',
        type=str,
        default='info',
        choices=['debug', 'info', 'warn'],
        help='minimum level of events to log, one of "debug", "info", "warn" '
        '(default "info")')
    argument_group.add_argument(
        '--log-interval',
        metavar='N',
        type=int,
        default=1000,
        help='print statistics of every Nth mini-batch update; quiet if less '
        'than one (default 1000)')
    argument_group.add_argument(
        '--debug',
        action="store_true",
        help='use test values to get better error messages from Theano')
    argument_group.add_argument('--print-graph',
                                action="store_true",
                                help='print Theano computation graph')
    argument_group.add_argument('--profile',
                                action="store_true",
                                help='enable profiling Theano functions')
    argument_group.add_argument(
        '--load-and-train',
        action="store_true",
        help='load the weight matrices from the MODEL and retrain')
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(prog='wctool')

    argument_group = parser.add_argument_group("files")
    argument_group.add_argument(
        '--training-set',
        metavar='FILE',
        type=TextFileType('r'),
        nargs='+',
        required=True,
        help='text or .gz files containing training data (one sentence per '
        'line)')
    argument_group.add_argument(
        '--vocabulary',
        metavar='FILE',
        type=TextFileType('r'),
        default=None,
        help='text or .gz file containing a list of words to include in class '
        'forming, and possibly their initial classes')
    argument_group.add_argument(
        '--vocabulary-format',
        metavar='FORMAT',
        type=str,
        default='words',
        help='vocabulary format, one of "words" (one word per line, default), '
        '"classes" (word and class ID per line), "srilm-classes" (class '
        'name, membership probability, and word per line)')
    argument_group.add_argument(
        '--output-file',
        metavar='FILE',
        type=TextFileType('w'),
        default='-',
        help='where to write the word classes (default stdout)')
    argument_group.add_argument(
        '--output-format',
        metavar='FORMAT',
        type=str,
        default='srilm-classes',
        help='format of the output file, one of "classes" (word and class ID '
        'per line), "srilm-classes" (default; class name, membership '
        'probability, and word per line)')
    argument_group.add_argument(
        '--output-frequency',
        metavar='N',
        type=int,
        default='1',
        help='save classes N times per optimization iteration (default 1)')

    argument_group = parser.add_argument_group("optimization")
    argument_group.add_argument(
        '--num-classes',
        metavar='N',
        type=int,
        default=2000,
        help='number of classes to form, if vocabulary is not specified '
        '(default 2000)')
    argument_group.add_argument(
        '--method',
        metavar='NAME',
        type=str,
        default='bigram-theano',
        help='method for creating word classes, one of "bigram-theano", '
        '"bigram-numpy" (default "bigram-theano")')

    argument_group = parser.add_argument_group("logging and debugging")
    argument_group.add_argument(
        '--log-file',
        metavar='FILE',
        type=str,
        default='-',
        help='path where to write log file (default is standard output)')
    argument_group.add_argument(
        '--log-level',
        metavar='LEVEL',
        type=str,
        default='info',
        help='minimum level of events to log, one of "debug", "info", "warn" '
        '(default "info")')
    argument_group.add_argument(
        '--log-interval',
        metavar='N',
        type=int,
        default=1000,
        help='print statistics after every Nth word; quiet if less than one '
        '(default 1000)')

    args = parser.parse_args()

    log_file = args.log_file
    log_level = getattr(logging, args.log_level.upper(), None)
    if not isinstance(log_level, int):
        raise ValueError("Invalid logging level requested: " + args.log_level)
    log_format = '%(asctime)s %(funcName)s: %(message)s'
    if args.log_file == '-':
        logging.basicConfig(stream=sys.stdout,
                            format=log_format,
                            level=log_level)
    else:
        logging.basicConfig(filename=log_file,
                            format=log_format,
                            level=log_level)

    if args.vocabulary is None:
        word_counts = compute_word_counts(args.training_set)
        vocabulary = Vocabulary.from_word_counts(word_counts, args.num_classes)
        for subset_file in args.training_set:
            subset_file.seek(0)
    else:
        vocabulary = Vocabulary.from_file(args.vocabulary,
                                          args.vocabulary_format)

    print("Number of words in vocabulary:", vocabulary.num_shortlist_words())
    print("Number of word classes:", vocabulary.num_classes())
    print("Number of normal word classes:", vocabulary.num_normal_classes)

    logging.info("Reading word unigram and bigram statistics.")
    statistics = BigramStatistics(args.training_set, vocabulary)

    if args.method == 'bigram-theano':
        optimizer = TheanoBigramOptimizer(statistics, vocabulary)
    elif args.method == 'bigram-numpy':
        optimizer = NumpyBigramOptimizer(statistics, vocabulary)
    else:
        raise ValueError("Invalid method requested: " + args.method)

    iteration = 1
    while True:
        logging.info("Starting iteration %d.", iteration)
        num_words = 0
        num_moves = 0
        for word in vocabulary.words():
            start_time = time()
            num_words += 1
            if optimizer.move_to_best_class(word):
                num_moves += 1
            duration = time() - start_time
            if (args.log_interval >= 1) and \
               (num_words % args.log_interval == 0):
                logging.info(
                    "[%d] (%.1f %%) of iteration %d -- moves = %d, cost = %.2f, duration = %.1f ms",
                    num_words,
                    num_words / vocabulary.num_shortlist_words() * 100,
                    iteration, num_moves, optimizer.log_likelihood(),
                    duration * 100)
            if is_scheduled(num_words, args.output_frequency,
                            vocabulary.num_shortlist_words()):
                save(optimizer, args.output_file, args.output_format)

        if num_moves == 0:
            break
        iteration += 1

    logging.info("Optimization finished.")
    save(optimizer, args.output_file, args.output_format)
Beispiel #7
0
def add_arguments(parser):
    """Specifies the command line arguments supported by the "theanolm decode"
    command.

    :type parser: argparse.ArgumentParser
    :param parser: a command line argument parser
    """

    argument_group = parser.add_argument_group("files")
    argument_group.add_argument(
        'model_path', metavar='MODEL-FILE', type=str,
        help='the model file that will be used to compute new word scores')
    argument_group.add_argument(
        '--lattices', metavar='FILE', type=str, nargs='*', default=[],
        help='word lattices to be decoded (default stdin, assumed to be '
             'compressed if the name ends in ".gz")')
    argument_group.add_argument(
        '--lattice-list', metavar='FILE', type=TextFileType('r'),
        help='text file containing a list of word lattices to be decoded (one '
             'path per line, the list and the lattice files are assumed to be '
             'compressed if the name ends in ".gz")')
    argument_group.add_argument(
        '--lattice-format', metavar='FORMAT', type=str, default='slf',
        choices=['slf', 'kaldi'],
        help='format of the lattice files, either "slf" (HTK format, default) '
             'or "kaldi" (a Kaldi lattice archive containing text '
             'CompactLattices')
    argument_group.add_argument(
        '--kaldi-vocabulary', metavar='FILE', type=TextFileType('r'),
        default=None,
        help='mapping of words to word IDs in Kaldi lattices (usually '
             'named words.txt)')
    argument_group.add_argument(
        '--output-file', metavar='FILE', type=TextFileType('w'), default='-',
        help='where to write the best paths through the lattices or the '
             'rescored lattice (default stdout, will be compressed if the name '
             'ends in ".gz")')
    argument_group.add_argument(
        '--num-jobs', metavar='J', type=int, default=1,
        help='divide the set of lattice files into J distinct batches, and '
             'process only batch I')
    argument_group.add_argument(
        '--job', metavar='I', type=int, default=0,
        help='the index of the batch that this job should process, between 0 '
             'and J-1')

    argument_group = parser.add_argument_group("decoding")
    argument_group.add_argument(
        '--output', metavar='FORMAT', type=str, default='ref',
        choices=['ref', 'trn', 'full', 'slf', 'kaldi'],
        help='format of the output, one of "ref" (default, utterance ID '
             'followed by words), "trn" (words followed by utterance ID in '
             'parentheses), "full" (utterance ID, acoustic score, language '
             'score, and number of words, followed by words), "slf" (rescored '
             'lattice in HTK format), "kaldi" (rescored lattice in Kaldi '
             'format)')
    argument_group.add_argument(
        '--n-best', metavar='N', type=int, default=1,
        help='print N best paths of each lattice (default 1)')
    argument_group.add_argument(
        '--nnlm-weight', metavar='LAMBDA', type=float, default=1.0,
        help="language model probabilities given by the model read from "
             "MODEL-FILE will be weighted by LAMBDA, when interpolating with "
             "the language model probabilities in the lattice (default is 1.0, "
             "meaning that the LM probabilities in the lattice will be "
             "ignored)")
    argument_group.add_argument(
        '--lm-scale', metavar='LMSCALE', type=float, default=None,
        help="scale language model log probabilities by LMSCALE when computing "
             "the total probability of a path (default is to use the LM scale "
             "specified in the lattice file, or 1.0 if not specified)")
    argument_group.add_argument(
        '--wi-penalty', metavar='WIP', type=float, default=None,
        help="penalize word insertion by adding WIP to the total log "
             "probability as many times as there are words in the path "
             "(without scaling WIP by LMSCALE)")
    argument_group.add_argument(
        '--log-base', metavar='B', type=int, default=None,
        help="convert output log probabilities to base B and WIP from base B "
             "(default is natural logarithm; this does not affect reading "
             "lattices, since they specify their internal log base)")
    argument_group.add_argument(
        '--unk-penalty', metavar='LOGPROB', type=float, default=None,
        help="use constant LOGPROB as <unk> token score (default is to use the "
             "network to predict <unk> probability)")
    argument_group.add_argument(
        '--shortlist', action="store_true",
        help='distribute <unk> token probability among the out-of-shortlist '
             'words according to their unigram frequencies in the training '
             'data')
    argument_group.add_argument(
        '--unk-from-lattice', action="store_true",
        help='use only the probability from the lattice for <unk> tokens')
    argument_group.add_argument(
        '--linear-interpolation', action="store_true",
        help="use linear interpolation of language model probabilities, "
             "instead of (pseudo) log-linear")

    argument_group = parser.add_argument_group("pruning")
    argument_group.add_argument(
        '--max-tokens-per-node', metavar='T', type=int, default=None,
        help="keep only at most T tokens at each node when decoding a lattice "
             "(default is no limit)")
    argument_group.add_argument(
        '--beam', metavar='B', type=float, default=None,
        help="prune tokens whose log probability is at least B smaller than "
             "the log probability of the best token at any given time (default "
             "is no beam pruning)")
    argument_group.add_argument(
        '--recombination-order', metavar='O', type=int, default=None,
        help="keep only the best token, when at least O previous words are "
             "identical (default is to recombine tokens only if the entire "
             "word history matches)")
    argument_group.add_argument(
        '--prune-relative', metavar='R', type=int, default=None,
        help="if set, tighten the beam and the max-tokens-per-node pruning "
             "linearly in the number of tokens in a node; those parameters "
             "will be divided by the number of tokens and multiplied by R")
    argument_group.add_argument(
        '--abs-min-max-tokens', metavar='T', type=float, default=30,
        help="if prune-extra-limit is used, do not tighten max-tokens-per-node "
             "further than this (default is 30)")
    argument_group.add_argument(
        '--abs-min-beam', metavar='B', type=float, default=150,
        help="if prune-extra-limit is used, do not tighten the beam further "
             "than this (default is 150)")

    argument_group = parser.add_argument_group("configuration")
    argument_group.add_argument(
        '--default-device', metavar='DEVICE', type=str, default=None,
        help='when multiple GPUs are present, use DEVICE as default')

    argument_group = parser.add_argument_group("logging and debugging")
    argument_group.add_argument(
        '--log-file', metavar='FILE', type=str, default='-',
        help='path where to write log file (default is standard output)')
    argument_group.add_argument(
        '--log-level', metavar='LEVEL', type=str, default='info',
        help='minimum level of events to log, one of "debug", "info", "warn" '
             '(default "info")')
    argument_group.add_argument(
        '--debug', action="store_true",
        help='enables debugging Theano errors')
    argument_group.add_argument(
        '--profile', action="store_true",
        help='enables profiling Theano functions')