Ejemplo n.º 1
0
def get_args():
    """ Get args from stdin.

    We add compulsory arguments as named arguments for readability

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """
    parser = argparse.ArgumentParser(
        description="""Trains a feed forward DNN acoustic model using the
        cross-entropy objective.  DNNs include simple DNNs, TDNNs and CNNs.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser(include_chunk_context=False).parser])

    # egs extraction options
    parser.add_argument("--egs.frames-per-eg", type=int, dest='frames_per_eg',
                        default=8,
                        help="Number of output labels per example")

    # trainer options
    parser.add_argument("--trainer.prior-subset-size", type=int,
                        dest='prior_subset_size', default=20000,
                        help="Number of samples for computing priors")
    parser.add_argument("--trainer.num-jobs-compute-prior", type=int,
                        dest='num_jobs_compute_prior', default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.minibatch-size",
                        type=str, dest='minibatch_size', default='512',
                        help="""Size of the minibatch used in SGD training
                        (argument to nnet3-merge-egs); may be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")

    # General options
    parser.add_argument("--feat-dir", type=str, required=False,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--lang", type=str, required=False,
                        help="Language directory")
    parser.add_argument("--ali-dir", type=str, required=True,
                        help="Directory with alignments used for training "
                        "the neural network.")
    parser.add_argument("--dir", type=str, required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv), file=sys.stderr)
    print(sys.argv, file=sys.stderr)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 2
0
def get_args():
    """ Get args from stdin.

    We add compulsory arguments as named arguments for readability

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See subtools/kaldi/steps_multitask/libs/nnet3/train/common.py
    """
    parser = argparse.ArgumentParser(
        description="""Trains a feed forward DNN acoustic model using the
        cross-entropy objective.  DNNs include simple DNNs, TDNNs and CNNs.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser(include_chunk_context=False).parser])

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.minibatch-size",
                        type=str, dest='minibatch_size', default='512',
                        help="""Size of the minibatch used in SGD training
                        (argument to nnet3-merge-egs); may be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")
    parser.add_argument("--trainer.num-jobs-compute-prior", type=int,
                        dest='num_jobs_compute_prior', default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # General options
    parser.add_argument("--am-output-name", type=str, required=True,
                        help="The name of am output-node")
    parser.add_argument("--xvec-output-name", type=str, required=True,
                        help="The name of xvec output-node")
    parser.add_argument("--am-weight", type=float, default=1.0,
                        help="The am weight")
    parser.add_argument("--xvec-weight", type=float, default=1.0,
                        help="The xvec weight")
    parser.add_argument("--am-egs-dir", type=str, required=True,
                        help="Directory with am egs for training")
    parser.add_argument("--xvec-egs-dir", type=str, required=True,
                        help="Directory with xvector egs for training")
    parser.add_argument("--dir", type=str, required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv), file=sys.stderr)
    print(sys.argv, file=sys.stderr)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 3
0
def get_args():
    """ Get args from stdin.

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains a feed forward raw DNN (without transition model)
        using frame-level objectives like cross-entropy and mean-squared-error.
        DNNs include simple DNNs, TDNNs and CNNs.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[
            common_train_lib.CommonParser(include_chunk_context=False).parser
        ])

    # egs extraction options
    parser.add_argument("--egs.frames-per-eg",
                        type=int,
                        dest='frames_per_eg',
                        default=8,
                        help="Number of output labels per example")
    parser.add_argument("--image.augmentation-opts",
                        type=str,
                        dest='image_augmentation_opts',
                        default=None,
                        help="Image augmentation options")

    # trainer options
    parser.add_argument("--trainer.prior-subset-size",
                        type=int,
                        dest='prior_subset_size',
                        default=20000,
                        help="Number of samples for computing priors")
    parser.add_argument("--trainer.num-jobs-compute-prior",
                        type=int,
                        dest='num_jobs_compute_prior',
                        default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.minibatch-size",
                        type=str,
                        dest='minibatch_size',
                        default='512',
                        help="""Size of the minibatch used in SGD training
                        (argument to nnet3-merge-egs); may be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")
    parser.add_argument("--compute-average-posteriors",
                        type=str,
                        action=common_lib.StrToBoolAction,
                        choices=["true", "false"],
                        default=False,
                        help="""If true, then the average output of the
                        network is computed and dumped as post.final.vec""")

    # General options
    parser.add_argument("--nj",
                        type=int,
                        default=4,
                        help="Number of parallel jobs")
    parser.add_argument("--use-dense-targets",
                        type=str,
                        action=common_lib.StrToBoolAction,
                        default=True,
                        choices=["true", "false"],
                        help="Train neural network using dense targets")
    parser.add_argument("--feat-dir",
                        type=str,
                        required=False,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--targets-scp",
                        type=str,
                        required=False,
                        help="Targets for training neural network.")
    parser.add_argument("--dir",
                        type=str,
                        required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 4
0
def get_args():
    """ Get args from stdin.

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See subtools/kaldi/steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains a raw RNN (without transition model) using
        frame-level objectives like cross-entropy and mean-squared-error.
        RNNs include LSTMs, BLSTMs and GRUs.
        RNN acoustic model training differs from feed-forward DNN training in
        the following ways
            1. RNN acoustic models train on output chunks rather than
               individual outputs
            2. The training includes additional stage of shrinkage, where the
               parameters of the model are scaled when the derivative averages
               at the non-linearities are below a threshold.
            3. RNNs can also be trained with state preservation training""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser(default_chunk_left_context=40).parser])

    # egs extraction options
    parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width',
                        default="20",
                        help="""Number of frames per chunk in the examples
                        used to train the RNN.   Caution: if you double this you
                        should halve --trainer.samples-per-iter.  May be
                        a comma-separated list of alternatives: first width
                        is the 'principal' chunk-width, used preferentially""")

    # trainer options
    parser.add_argument("--trainer.input-model", type=str,
                        dest='input_model', default=None,
                        action=common_lib.NullstrToNoneAction,
                        help="""If specified, this model is used as initial
                        raw model (0.raw in the script) instead of initializing
                        the model from xconfig. Configs dir is not expected to
                        exist and left/right context is computed from this
                        model.""")
    parser.add_argument("--trainer.samples-per-iter", type=int,
                        dest='samples_per_iter', default=20000,
                        help="""This is really the number of egs in each
                        archive.  Each eg has 'chunk_width' frames in it--
                        for chunk_width=20, this value (20k) is equivalent
                        to the 400k number that we use as a default in
                        regular DNN training.
                        Overrides the default value in CommonParser.""")
    parser.add_argument("--trainer.prior-subset-size", type=int,
                        dest='prior_subset_size', default=20000,
                        help="Number of samples for computing priors")
    parser.add_argument("--trainer.num-jobs-compute-prior", type=int,
                        dest='num_jobs_compute_prior', default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.momentum", type=float,
                        dest='momentum', default=0.5,
                        help="""Momentum used in update computation.
                        Note: we implemented it in such a way that
                        it doesn't increase the effective learning rate.
                        Overrides the default value in CommonParser""")
    parser.add_argument("--trainer.optimization.shrink-value", type=float,
                        dest='shrink_value', default=0.99,
                        help="""Scaling factor used for scaling the parameter
                        matrices when the derivative averages are below the
                        shrink-threshold at the non-linearities.  E.g. 0.99.
                        Only applicable when the neural net contains sigmoid or
                        tanh units.""")
    parser.add_argument("--trainer.optimization.shrink-saturation-threshold",
                        type=float,
                        dest='shrink_saturation_threshold', default=0.40,
                        help="""Threshold that controls when we apply the
                        'shrinkage' (i.e. scaling by shrink-value).  If the
                        saturation of the sigmoid and tanh nonlinearities in
                        the neural net (as measured by
                        subtools/kaldi/steps/nnet3/get_saturation.pl) exceeds this threshold
                        we scale the parameter matrices with the
                        shrink-value.""")
    # RNN specific trainer options
    parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=str,
                        dest='num_chunk_per_minibatch', default='100',
                        help="""Number of sequences to be processed in
                        parallel every minibatch.  May be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")
    parser.add_argument("--trainer.deriv-truncate-margin", type=int,
                        dest='deriv_truncate_margin', default=8,
                        help="""Margin (in input frames) around the 'required'
                        part of each chunk that the derivatives are
                        backpropagated to. E.g., 8 is a reasonable setting.
                        Note: the 'required' part of the chunk is defined by
                        the model's {left,right}-context.""")
    parser.add_argument("--compute-average-posteriors",
                        type=str, action=common_lib.StrToBoolAction,
                        choices=["true", "false"], default=False,
                        help="""If true, then the average output of the
                        network is computed and dumped as post.final.vec""")

    # General options
    parser.add_argument("--nj", type=int, default=4,
                        help="Number of parallel jobs")
    parser.add_argument("--use-dense-targets", type=str,
                        action=common_lib.StrToBoolAction,
                        default=True, choices=["true", "false"],
                        help="Train neural network using dense targets")
    parser.add_argument("--feat-dir", type=str, required=True,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--targets-scp", type=str, required=True,
                        help="Target for training neural network.")
    parser.add_argument("--dir", type=str, required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 5
0
def get_args():
    """ Get args from stdin.

    We add compulsary arguments as named arguments for readability

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains RNN and DNN acoustic models using the 'chain'
        objective function.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser().parser])

    # egs extraction options
    parser.add_argument("--egs.chunk-width",
                        type=str,
                        dest='chunk_width',
                        default="20",
                        help="""Number of frames per chunk in the examples
                        used to train the RNN.   Caution: if you double this you
                        should halve --trainer.samples-per-iter.  May be
                        a comma-separated list of alternatives: first width
                        is the 'principal' chunk-width, used preferentially""")

    # chain options
    parser.add_argument("--chain.lm-opts",
                        type=str,
                        dest='lm_opts',
                        default=None,
                        action=common_lib.NullstrToNoneAction,
                        help="options to be be passed to chain-est-phone-lm")
    parser.add_argument("--chain.l2-regularize",
                        type=float,
                        dest='l2_regularize',
                        default=0.0,
                        help="""Weight of regularization function which is the
                        l2-norm of the output of the network. It should be used
                        without the log-softmax layer for the outputs.  As
                        l2-norm of the log-softmax outputs can dominate the
                        objective function.""")
    parser.add_argument("--chain.xent-regularize",
                        type=float,
                        dest='xent_regularize',
                        default=0.0,
                        help="Weight of regularization function which is the "
                        "cross-entropy cost the outputs.")
    parser.add_argument("--chain.right-tolerance",
                        type=int,
                        dest='right_tolerance',
                        default=5,
                        help="")
    parser.add_argument("--chain.left-tolerance",
                        type=int,
                        dest='left_tolerance',
                        default=5,
                        help="")
    parser.add_argument("--chain.leaky-hmm-coefficient",
                        type=float,
                        dest='leaky_hmm_coefficient',
                        default=0.00001,
                        help="")
    parser.add_argument("--chain.apply-deriv-weights",
                        type=str,
                        dest='apply_deriv_weights',
                        default=True,
                        action=common_lib.StrToBoolAction,
                        choices=["true", "false"],
                        help="")
    parser.add_argument("--chain.frame-subsampling-factor",
                        type=int,
                        dest='frame_subsampling_factor',
                        default=3,
                        help="ratio of frames-per-second of features we "
                        "train on, to chain model's output")
    parser.add_argument("--chain.alignment-subsampling-factor",
                        type=int,
                        dest='alignment_subsampling_factor',
                        default=3,
                        help="ratio of frames-per-second of input "
                        "alignments to chain model's output")
    parser.add_argument("--chain.left-deriv-truncate",
                        type=int,
                        dest='left_deriv_truncate',
                        default=None,
                        help="Deprecated. Kept for back compatibility")

    # trainer options
    parser.add_argument("--trainer.input-model",
                        type=str,
                        dest='input_model',
                        default=None,
                        action=common_lib.NullstrToNoneAction,
                        help="If specified, this model is used as initial "
                        "'raw' model (0.raw in the script) instead of "
                        "initializing the model from the xconfig. "
                        "Also configs dir is not expected to exist "
                        "and left/right context is computed from this "
                        "model.")
    parser.add_argument("--trainer.num-epochs",
                        type=float,
                        dest='num_epochs',
                        default=10.0,
                        help="Number of epochs to train the model")
    parser.add_argument("--trainer.frames-per-iter",
                        type=int,
                        dest='frames_per_iter',
                        default=800000,
                        help="""Each iteration of training, see this many
                        [input] frames per job.  This option is passed to
                        get_egs.sh.  Aim for about a minute of training
                        time""")

    parser.add_argument("--trainer.num-chunk-per-minibatch",
                        type=str,
                        dest='num_chunk_per_minibatch',
                        default='128',
                        help="""Number of sequences to be processed in
                        parallel every minibatch.  May be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.initial-effective-lrate",
                        type=float,
                        dest='initial_effective_lrate',
                        default=0.0002,
                        help="Learning rate used during the initial iteration")
    parser.add_argument("--trainer.optimization.final-effective-lrate",
                        type=float,
                        dest='final_effective_lrate',
                        default=0.00002,
                        help="Learning rate used during the final iteration")
    parser.add_argument("--trainer.optimization.shrink-value",
                        type=float,
                        dest='shrink_value',
                        default=1.0,
                        help="""Scaling factor used for scaling the parameter
                        matrices when the derivative averages are below the
                        shrink-threshold at the non-linearities.  E.g. 0.99.
                        Only applicable when the neural net contains sigmoid or
                        tanh units.""")
    parser.add_argument("--trainer.optimization.shrink-saturation-threshold",
                        type=float,
                        dest='shrink_saturation_threshold',
                        default=0.40,
                        help="""Threshold that controls when we apply the
                        'shrinkage' (i.e. scaling by shrink-value).  If the
                        saturation of the sigmoid and tanh nonlinearities in
                        the neural net (as measured by
                        steps/nnet3/get_saturation.pl) exceeds this threshold
                        we scale the parameter matrices with the
                        shrink-value.""")
    # RNN-specific training options
    parser.add_argument("--trainer.deriv-truncate-margin",
                        type=int,
                        dest='deriv_truncate_margin',
                        default=None,
                        help="""(Relevant only for recurrent models). If
                        specified, gives the margin (in input frames) around
                        the 'required' part of each chunk that the derivatives
                        are backpropagated to. If unset, the derivatives are
                        backpropagated all the way to the boundaries of the
                        input data. E.g. 8 is a reasonable setting. Note: the
                        'required' part of the chunk is defined by the model's
                        {left,right}-context.""")

    # General options
    parser.add_argument("--feat-dir",
                        type=str,
                        required=True,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--tree-dir",
                        type=str,
                        required=True,
                        help="""Directory containing the tree to use for this
                        model (we also expect final.mdl and ali.*.gz in that
                        directory""")
    parser.add_argument("--lat-dir",
                        type=str,
                        required=True,
                        help="Directory with numerator lattices "
                        "used for training the neural network.")
    parser.add_argument("--dir",
                        type=str,
                        required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 6
0
def get_args():
    """ Get args from stdin.

    We add compulsary arguments as named arguments for readability

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains an RNN acoustic model using the cross-entropy
        objective.  RNNs include LSTMs, BLSTMs and GRUs.
        RNN acoustic model training differs from feed-forward DNN training in
        the following ways
            1. RNN acoustic models train on output chunks rather than
               individual outputs
            2. The training includes additional stage of shrinkage, where
               the parameters of the model are scaled when the derivative
               averages at the non-linearities are below a threshold.
            3. RNNs can also be trained with state preservation training""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser().parser])

    # egs extraction options
    parser.add_argument("--egs.chunk-width",
                        type=int,
                        dest='chunk_width',
                        default=20,
                        help="""Number of output labels in the sequence
                        used to train an LSTM.
                        Caution: if you double this you should halve
                        --trainer.samples-per-iter.""")
    parser.add_argument("--egs.chunk-left-context",
                        type=int,
                        dest='chunk_left_context',
                        default=40,
                        help="""Number of left steps used in the estimation of
                        LSTM state before prediction of the first label""")

    parser.add_argument("--trainer.samples-per-iter",
                        type=int,
                        dest='samples_per_iter',
                        default=20000,
                        help="""This is really the number of egs in each
                        archive.  Each eg has 'chunk_width' frames in it--
                        for chunk_width=20, this value (20k) is equivalent
                        to the 400k number that we use as a default in
                        regular DNN training.
                        Overrides the default value in CommonParser.""")
    parser.add_argument("--trainer.prior-subset-size",
                        type=int,
                        dest='prior_subset_size',
                        default=20000,
                        help="Number of samples for computing priors")
    parser.add_argument("--trainer.num-jobs-compute-prior",
                        type=int,
                        dest='num_jobs_compute_prior',
                        default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.momentum",
                        type=float,
                        dest='momentum',
                        default=0.5,
                        help="""Momentum used in update computation.
                        Note: we implemented it in such a way that
                        it doesn't increase the effective learning rate.
                        Overrides the default value in CommonParser""")
    parser.add_argument("--trainer.optimization.shrink-value",
                        type=float,
                        dest='shrink_value',
                        default=0.99,
                        help="""Scaling factor used for scaling the parameter
                        matrices when the derivative averages are below the
                        shrink-threshold at the non-linearities.  E.g. 0.99.
                        Only applicable when the neural net contains sigmoid or
                        tanh units.""")
    parser.add_argument(
        "--trainer.optimization.shrink-saturation-threshold",
        type=float,
        dest='shrink_saturation_threshold',
        default=0.40,
        help="""Threshold that controls when we apply the 'shrinkage'
                        (i.e. scaling by shrink-value).  If the saturation of the
                        sigmoid and tanh nonlinearities in the neural net (as
                        measured by steps/nnet3/get_saturation.pl) exceeds this
                        threshold we scale the parameter matrices with the
                        shrink-value.""")
    parser.add_argument("--trainer.optimization.cv-minibatch-size",
                        type=int,
                        dest='cv_minibatch_size',
                        default=256,
                        help="""Size of the minibatch to be used in diagnostic
                        jobs (use smaller value for BLSTMs to control memory
                        usage)""")

    # RNN specific trainer options
    parser.add_argument("--trainer.rnn.num-chunk-per-minibatch",
                        type=int,
                        dest='num_chunk_per_minibatch',
                        default=100,
                        help="Number of sequences to be processed in "
                        "parallel every minibatch")
    parser.add_argument("--trainer.rnn.num-bptt-steps",
                        type=int,
                        dest='num_bptt_steps',
                        default=None,
                        help="""Deprecated. Kept for back compatibility.""")
    parser.add_argument("--trainer.deriv-truncate-margin",
                        type=int,
                        dest='deriv_truncate_margin',
                        default=8,
                        help="""Margin (in input frames) around the 'required'
                        part of each chunk that the derivatives are
                        backpropagated to. E.g., 8 is a reasonable setting.
                        Note: the 'required' part of the chunk is defined by
                        the model's {left,right}-context.""")

    # General options
    parser.add_argument("--feat-dir",
                        type=str,
                        required=True,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--lang",
                        type=str,
                        required=True,
                        help="Language directory")
    parser.add_argument("--ali-dir",
                        type=str,
                        required=True,
                        help="Directory with alignments used for training "
                        "the neural network.")
    parser.add_argument("--dir",
                        type=str,
                        required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 7
0
def get_args():
    """ Get args from stdin.

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains a feed forward raw DNN (without transition model)
        using frame-level objectives like cross-entropy and mean-squared-error.
        DNNs include simple DNNs, TDNNs and CNNs.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve',
        parents=[common_train_lib.CommonParser(include_chunk_context = False).parser])

    # egs extraction options
    parser.add_argument("--egs.frames-per-eg", type=int, dest='frames_per_eg',
                        default=8,
                        help="Number of output labels per example")
    parser.add_argument("--image.augmentation-opts", type=str,
                        dest='image_augmentation_opts',
                        default=None,
                        help="Image augmentation options")

    # trainer options
    parser.add_argument("--trainer.final-combination", type=str,
                        action=common_lib.StrToBoolAction,
                        default=True, choices=["true", "false"],
                        dest='final_combination',
                        help="If false, skip final combination step")
    parser.add_argument("--trainer.prior-subset-size", type=int,
                        dest='prior_subset_size', default=20000,
                        help="Number of samples for computing priors")
    parser.add_argument("--trainer.num-jobs-compute-prior", type=int,
                        dest='num_jobs_compute_prior', default=10,
                        help="The prior computation jobs are single "
                        "threaded and run on the CPU")

    # Parameters for the optimization
    parser.add_argument("--trainer.optimization.minibatch-size",
                        type=str, dest='minibatch_size', default='512',
                        help="""Size of the minibatch used in SGD training
                        (argument to nnet3-merge-egs); may be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""")
    parser.add_argument("--trainer.optimization.proportional-shrink", type=float,
                        dest='proportional_shrink', default=0.0,
                        help="""If nonzero, this will set a shrinkage (scaling)
                        factor for the parameters, whose value is set as:
                        shrink-value=(1.0 - proportional-shrink * learning-rate), where
                        'learning-rate' is the learning rate being applied
                        on the current iteration, which will vary from
                        initial-effective-lrate*num-jobs-initial to
                        final-effective-lrate*num-jobs-final.
                        Unlike for train_rnn.py, this is applied unconditionally,
                        it does not depend on saturation of nonlinearities.
                        Can be used to roughly approximate l2 regularization.""")

    # General options
    parser.add_argument("--nj", type=int, default=4,
                        help="Number of parallel jobs")
    parser.add_argument("--use-dense-targets", type=str,
                        action=common_lib.StrToBoolAction,
                        default=True, choices=["true", "false"],
                        help="Train neural network using dense targets")
    parser.add_argument("--feat-dir", type=str, required=False,
                        help="Directory with features used for training "
                        "the neural network.")
    parser.add_argument("--targets-scp", type=str, required=False,
                        help="Targets for training neural network.")
    parser.add_argument("--dir", type=str, required=True,
                        help="Directory to store the models and "
                        "all other files.")

    print(' '.join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]
Ejemplo n.º 8
0
def get_args():
    """Get args from stdin.

    The common options are defined in the object
    libs.nnet3.train.common.CommonParser.parser.
    See steps/libs/nnet3/train/common.py
    """

    parser = argparse.ArgumentParser(
        description="""Trains a feed forward raw DNN (without transition model)
        using frame-level objectives like cross-entropy and mean-squared-error.
        DNNs include simple DNNs, TDNNs and CNNs.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler="resolve",
        parents=[
            common_train_lib.CommonParser(include_chunk_context=False).parser
        ],
    )

    # egs extraction options
    parser.add_argument(
        "--egs.frames-per-eg",
        type=int,
        dest="frames_per_eg",
        default=8,
        help="Number of output labels per example",
    )
    parser.add_argument(
        "--image.augmentation-opts",
        type=str,
        dest="image_augmentation_opts",
        default=None,
        help="Image augmentation options",
    )

    # trainer options
    parser.add_argument(
        "--trainer.input-model",
        type=str,
        dest="input_model",
        default=None,
        action=common_lib.NullstrToNoneAction,
        help="""If specified, this model is used as initial
                        raw model (0.raw in the script) instead of initializing
                        the model from xconfig. Configs dir is not expected to
                        exist and left/right context is computed from this
                        model.""",
    )
    parser.add_argument(
        "--trainer.prior-subset-size",
        type=int,
        dest="prior_subset_size",
        default=20000,
        help="Number of samples for computing priors",
    )
    parser.add_argument(
        "--trainer.num-jobs-compute-prior",
        type=int,
        dest="num_jobs_compute_prior",
        default=10,
        help="The prior computation jobs are single "
        "threaded and run on the CPU",
    )

    # Parameters for the optimization
    parser.add_argument(
        "--trainer.optimization.minibatch-size",
        type=str,
        dest="minibatch_size",
        default="512",
        help="""Size of the minibatch used in SGD training
                        (argument to nnet3-merge-egs); may be a more general
                        rule as accepted by the --minibatch-size option of
                        nnet3-merge-egs; run that program without args to see
                        the format.""",
    )
    parser.add_argument(
        "--compute-average-posteriors",
        type=str,
        action=common_lib.StrToBoolAction,
        choices=["true", "false"],
        default=False,
        help="""If true, then the average output of the
                        network is computed and dumped as post.final.vec""",
    )

    # General options
    parser.add_argument("--nj",
                        type=int,
                        default=4,
                        help="Number of parallel jobs")
    parser.add_argument(
        "--use-dense-targets",
        type=str,
        action=common_lib.StrToBoolAction,
        default=True,
        choices=["true", "false"],
        help="Train neural network using dense targets",
    )
    parser.add_argument(
        "--feat-dir",
        type=str,
        required=False,
        help="Directory with features used for training "
        "the neural network.",
    )
    parser.add_argument(
        "--targets-scp",
        type=str,
        required=False,
        help="""Targets for training neural network.
                        This is a kaldi-format SCP file of target matrices.
                        <utterance-id> <extended-filename-of-target-matrix>.
                        The target matrix's column dim must match 
                        the neural network output dim, and the
                        row dim must match the number of output frames 
                        i.e. after subsampling if "--frame-subsampling-factor" 
                        option is passed to --egs.opts.""",
    )
    parser.add_argument(
        "--dir",
        type=str,
        required=True,
        help="Directory to store the models and "
        "all other files.",
    )

    print(" ".join(sys.argv))
    print(sys.argv)

    args = parser.parse_args()

    [args, run_opts] = process_args(args)

    return [args, run_opts]