Exemple #1
0
def compute_train_cv_probabilities(dir, iter, egs_dir, l2_regularize,
                                   xent_regularize, leaky_hmm_coefficient,
                                   run_opts):
    model = '{0}/{1}.mdl'.format(dir, iter)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/valid_diagnostic.cegs \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir))

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/train_diagnostic.cegs \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir))
Exemple #2
0
def compute_train_cv_probabilities(dir, iter, egs_dir, l2_regularize,
                                   xent_regularize, leaky_hmm_coefficient,
                                   run_opts):
    model = '{0}/{1}.mdl'.format(dir, iter)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/valid_diagnostic.cegs \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir))

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/train_diagnostic.cegs \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir))
Exemple #3
0
def compute_train_cv_probabilities(dir,
                                   iter,
                                   egs_dir,
                                   run_opts,
                                   get_raw_nnet_from_am=True):
    if get_raw_nnet_from_am:
        model = "nnet3-am-copy --raw=true {dir}/{iter}.mdl - |".format(
            dir=dir, iter=iter)
    else:
        model = "{dir}/{iter}.raw".format(dir=dir, iter=iter)

    common_lib.background_command(
        """ {command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-compute-prob "{model}" \
                "ark,bg:nnet3-copy-egs \
                    ark:{egs_dir}/valid_diagnostic.egs ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        model=model,
                                        egs_dir=egs_dir))

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-compute-prob "{model}" \
                "ark,bg:nnet3-copy-egs \
                    ark:{egs_dir}/train_diagnostic.egs ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        model=model,
                                        egs_dir=egs_dir))
Exemple #4
0
def compute_train_cv_probabilities(dir,
                                   iter,
                                   egs_dir,
                                   l2_regularize,
                                   xent_regularize,
                                   leaky_hmm_coefficient,
                                   run_opts,
                                   use_multitask_egs=False):
    model = '{0}/{1}.mdl'.format(dir, iter)
    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".cegs"

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
        egs_dir,
        egs_prefix="valid_diagnostic.",
        use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/valid_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   l2=l2_regularize,
                   leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark,
                   egs_suffix=egs_suffix))

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
        egs_dir,
        egs_prefix="train_diagnostic.",
        use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/train_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   l2=l2_regularize,
                   leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark,
                   egs_suffix=egs_suffix))
Exemple #5
0
def compute_train_cv_probabilities(dir,
                                   iter,
                                   egs_dir,
                                   run_opts,
                                   get_raw_nnet_from_am=True,
                                   use_egs=False,
                                   compute_per_dim_accuracy=False):
    if get_raw_nnet_from_am:
        model = "nnet3-am-copy --raw=true {dir}/{iter}.mdl - |".format(
            dir=dir, iter=iter)
    else:
        model = "{dir}/{iter}.raw".format(dir=dir, iter=iter)

    scp_or_ark = "scp" if use_egs else "ark"
    egs_suffix = ".scp" if use_egs else ".egs"
    egs_rspecifier = ("{0}:{1}/valid_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix))

    opts = []
    if compute_per_dim_accuracy:
        opts.append("--compute-per-dim-accuracy")

    multitask_egs_opts = common_train_lib.get_egs_opts(
        egs_dir, egs_prefix="valid_diagnostic.", use_egs=use_egs)

    common_lib.background_command(
        """ {command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-compute-prob "{model}" \
                "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                    {egs_rspecifier} ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        egs_rspecifier=egs_rspecifier,
                                        opts=' '.join(opts),
                                        model=model,
                                        multitask_egs_opts=multitask_egs_opts))

    egs_rspecifier = ("{0}:{1}/train_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix))

    multitask_egs_opts = common_train_lib.get_egs_opts(
        egs_dir, egs_prefix="train_diagnostic.", use_egs=use_egs)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-compute-prob {opts} "{model}" \
                "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                    {egs_rspecifier} ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        egs_rspecifier=egs_rspecifier,
                                        opts=' '.join(opts),
                                        model=model,
                                        multitask_egs_opts=multitask_egs_opts))
Exemple #6
0
def compute_train_cv_probabilities(dir, iter, egs_dir, run_opts,
                                   get_raw_nnet_from_am=True,
                                   use_multitask_egs=False,
                                   compute_per_dim_accuracy=False):
    if get_raw_nnet_from_am:
        model = "nnet3-am-copy --raw=true {dir}/{iter}.mdl - |".format(
                    dir=dir, iter=iter)
    else:
        model = "{dir}/{iter}.raw".format(dir=dir, iter=iter)

    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".egs"
    egs_rspecifier = ("{0}:{1}/valid_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix))

    opts = []
    if compute_per_dim_accuracy:
        opts.append("--compute-per-dim-accuracy")

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
                             egs_dir,
                             egs_prefix="valid_diagnostic.",
                             use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
        """ {command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-compute-prob "{model}" \
                "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                    {egs_rspecifier} ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        egs_rspecifier=egs_rspecifier,
                                        opts=' '.join(opts), model=model,
                                        multitask_egs_opts=multitask_egs_opts))

    egs_rspecifier = ("{0}:{1}/train_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix))

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
                             egs_dir,
                             egs_prefix="train_diagnostic.",
                             use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-compute-prob {opts} "{model}" \
                "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                    {egs_rspecifier} ark:- | \
                    nnet3-merge-egs --minibatch-size=1:64 ark:- \
                    ark:- |" """.format(command=run_opts.command,
                                        dir=dir,
                                        iter=iter,
                                        egs_rspecifier=egs_rspecifier,
                                        opts=' '.join(opts), model=model,
                                        multitask_egs_opts=multitask_egs_opts))
Exemple #7
0
def compute_progress(dir, iter, run_opts):

    prev_model = '{0}/{1}.raw'.format(dir, iter - 1)
    model = '{0}/{1}.raw'.format(dir, iter)

    common_lib.background_command("""{command} {dir}/log/progress.{iter}.log \
                nnet3-info {model} '&&' \
                nnet3-show-progress --use-gpu=no {prev_model} {model}\
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   prev_model=prev_model))
Exemple #8
0
def compute_progress(dir, iter, egs_dir, run_opts, get_raw_nnet_from_am=True):
    suffix = "mdl" if get_raw_nnet_from_am else "raw"
    prev_model = '{0}/{1}.{2}'.format(dir, iter - 1, suffix)
    model = '{0}/{1}.{2}'.format(dir, iter, suffix)

    common_lib.background_command("""{command} {dir}/log/progress.{iter}.log \
                    nnet3-info {model} '&&' \
                    nnet3-show-progress --use-gpu=no {prev_model} {model} """
                                  ''.format(command=run_opts.command,
                                            dir=dir,
                                            iter=iter,
                                            model=model,
                                            prev_model=prev_model))
Exemple #9
0
def compute_progress(dir, iter, egs_dir,
                     run_opts,
                     get_raw_nnet_from_am=True):
    suffix = "mdl" if get_raw_nnet_from_am else "raw"
    prev_model = '{0}/{1}.{2}'.format(dir, iter - 1, suffix)
    model = '{0}/{1}.{2}'.format(dir, iter, suffix)

    common_lib.background_command(
            """{command} {dir}/log/progress.{iter}.log \
                    nnet3-info {model} '&&' \
                    nnet3-show-progress --use-gpu=no {prev_model} {model} """
        ''.format(command=run_opts.command, dir=dir,
                  iter=iter, model=model, prev_model=prev_model))
Exemple #10
0
def compute_progress(dir, iter, run_opts):

    prev_model = '{0}/{1}.mdl'.format(dir, iter - 1)
    model = '{0}/{1}.mdl'.format(dir, iter)

    common_lib.background_command(
        """{command} {dir}/log/progress.{iter}.log \
                nnet3-am-info {model} '&&' \
                nnet3-show-progress --use-gpu=no \
                    "nnet3-am-copy --raw=true {prev_model} - |" \
                    "nnet3-am-copy --raw=true {model} - |"
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   prev_model=prev_model))
Exemple #11
0
def compute_train_cv_probabilities(dir, iter, egs_dir, l2_regularize,
                                   xent_regularize, leaky_hmm_coefficient,
                                   run_opts,
                                   use_multitask_egs=False):
    model = '{0}/{1}.mdl'.format(dir, iter)
    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".cegs"

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
                             egs_dir,
                             egs_prefix="valid_diagnostic.",
                             use_multitask_egs=use_multitask_egs)


    common_lib.background_command(
        """{command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                {model} {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/valid_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark, egs_suffix=egs_suffix))

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
                             egs_dir,
                             egs_prefix="train_diagnostic.",
                             use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-chain-compute-prob --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                {model} {dir}/den.fst \
                "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} {scp_or_ark}:{egs_dir}/train_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model,
                   l2=l2_regularize, leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark, egs_suffix=egs_suffix))
Exemple #12
0
def compute_progress(dir, iter, egs_dir, run_opts, get_raw_nnet_from_am=True):
    if get_raw_nnet_from_am:
        prev_model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(
            dir, iter - 1)
        model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(dir, iter)
    else:
        prev_model = '{0}/{1}.raw'.format(dir, iter - 1)
        model = '{0}/{1}.raw'.format(dir, iter)

    common_lib.background_command("""{command} {dir}/log/progress.{iter}.log \
                    nnet3-info "{model}" '&&' \
                    nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \
                    "ark,bg:nnet3-copy-egs \
                        ark:{egs_dir}/train_diagnostic.egs ark:- | \
                        nnet3-merge-egs --minibatch-size=1:64 ark:- \
                        ark:- |" """.format(command=run_opts.command,
                                            dir=dir,
                                            iter=iter,
                                            model=model,
                                            prev_model=prev_model,
                                            egs_dir=egs_dir))
Exemple #13
0
def compute_progress(dir, iter, egs_dir, run_opts, get_raw_nnet_from_am=True):
    suffix = "mdl" if get_raw_nnet_from_am else "raw"
    prev_model = '{0}/{1}.{2}'.format(dir, iter - 1, suffix)
    model = '{0}/{1}.{2}'.format(dir, iter, suffix)

    common_lib.background_command("""{command} {dir}/log/progress.{iter}.log \
                    nnet3-info {model} '&&' \
                    nnet3-show-progress --use-gpu=no {prev_model} {model} """
                                  ''.format(command=run_opts.command,
                                            dir=dir,
                                            iter=iter,
                                            model=model,
                                            prev_model=prev_model))

    if iter % 10 == 0 and iter > 0:
        # Every 10 iters, print some more detailed information.
        # full_progress.X.log contains some diagnostics of the difference in
        # parameters, printed in the same format as from nnet3-info.
        common_lib.background_command(
            """{command} {dir}/log/full_progress.{iter}.log \
            nnet3-show-progress --use-gpu=no --verbose=2 {prev_model} {model}
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   prev_model=prev_model))
        # full_info.X.log is just the nnet3-info of the model, with the --verbose=2
        # option which includes stats on the singular values of the parameter matrices.
        common_lib.background_command(
            """{command} {dir}/log/full_info.{iter}.log \
            nnet3-info --verbose=2 {model}
        """.format(command=run_opts.command, dir=dir, iter=iter, model=model))
Exemple #14
0
def compute_progress(dir, iter, run_opts):

    prev_model = '{0}/{1}.mdl'.format(dir, iter - 1)
    model = '{0}/{1}.mdl'.format(dir, iter)

    common_lib.background_command(
        """{command} {dir}/log/progress.{iter}.log \
                nnet3-am-info {model} '&&' \
                nnet3-show-progress --use-gpu=no {prev_model} {model}
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   prev_model=prev_model))
    if iter % 10 == 0 and iter > 0:
        # Every 10 iters, print some more detailed information.
        # full_progress.X.log contains some diagnostics of the difference in
        # parameters, printed in the same format as from nnet3-info.
        common_lib.background_command(
            """{command} {dir}/log/full_progress.{iter}.log \
            nnet3-show-progress --use-gpu=no --verbose=2 {prev_model} {model}
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   prev_model=prev_model))
        # full_info.X.log is just the nnet3-info of the model, with the --verbose=2
        # option which includes stats on the singular values of the parameter matrices.
        common_lib.background_command(
            """{command} {dir}/log/full_info.{iter}.log \
            nnet3-info --verbose=2 {model}
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model))
Exemple #15
0
def compute_progress(dir,
                     iter,
                     egs_dir,
                     run_opts,
                     get_raw_nnet_from_am=True,
                     use_multitask_egs=False):
    if get_raw_nnet_from_am:
        prev_model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(
            dir, iter - 1)
        model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(dir, iter)
    else:
        prev_model = '{0}/{1}.raw'.format(dir, iter - 1)
        model = '{0}/{1}.raw'.format(dir, iter)

    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".egs"

    egs_rspecifier = "{0}:{1}/train_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix)

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
        egs_dir,
        egs_prefix="train_diagnostic.",
        use_multitask_egs=use_multitask_egs)

    common_lib.background_command("""{command} {dir}/log/progress.{iter}.log \
                    nnet3-info "{model}" '&&' \
                    nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \
                    "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                        {egs_rspecifier} ark:- | \
                        nnet3-merge-egs --minibatch-size=1:64 ark:- \
                        ark:- |" """.format(
        command=run_opts.command,
        dir=dir,
        iter=iter,
        egs_rspecifier=egs_rspecifier,
        model=model,
        prev_model=prev_model,
        multitask_egs_opts=multitask_egs_opts))
Exemple #16
0
def compute_progress(dir, iter, egs_dir,
                     run_opts,
                     get_raw_nnet_from_am=True,
                     use_multitask_egs=False):
    if get_raw_nnet_from_am:
        prev_model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(
               dir, iter - 1)
        model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format(dir, iter)
    else:
        prev_model = '{0}/{1}.raw'.format(dir, iter - 1)
        model = '{0}/{1}.raw'.format(dir, iter)


    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".egs"

    egs_rspecifier = "{0}:{1}/train_diagnostic{2}".format(
        scp_or_ark, egs_dir, egs_suffix)

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
                             egs_dir,
                             egs_prefix="train_diagnostic.",
                             use_multitask_egs=use_multitask_egs)

    common_lib.background_command(
            """{command} {dir}/log/progress.{iter}.log \
                    nnet3-info "{model}" '&&' \
                    nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \
                    "ark,bg:nnet3-copy-egs {multitask_egs_opts} \
                        {egs_rspecifier} ark:- | \
                        nnet3-merge-egs --minibatch-size=1:64 ark:- \
                        ark:- |" """.format(command=run_opts.command,
                                            dir=dir,
                                            iter=iter,
                                            egs_rspecifier=egs_rspecifier,
                                            model=model,
                                            prev_model=prev_model,
                                            multitask_egs_opts=multitask_egs_opts))
Exemple #17
0
def train_new_models(dir,
                     iter,
                     srand,
                     num_jobs,
                     num_archives_processed,
                     num_archives,
                     raw_model_string,
                     egs_dir,
                     apply_deriv_weights,
                     min_deriv_time,
                     max_deriv_time_relative,
                     l2_regularize,
                     xent_regularize,
                     leaky_hmm_coefficient,
                     momentum,
                     max_param_change,
                     shuffle_buffer_size,
                     num_chunk_per_minibatch_str,
                     frame_subsampling_factor,
                     run_opts,
                     backstitch_training_scale=0.0,
                     backstitch_training_interval=1):
    """
    Called from train_one_iteration(), this method trains new models
    with 'num_jobs' jobs, and
    writes files like exp/tdnn_a/24.{1,2,3,..<num_jobs>}.raw

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    this is no longer true for RNNs as we use do not use the --frame option
    but we use the same script for consistency with FF-DNN code
    """

    deriv_time_opts = []
    if min_deriv_time is not None:
        deriv_time_opts.append(
            "--optimization.min-deriv-time={0}".format(min_deriv_time))
    if max_deriv_time_relative is not None:
        deriv_time_opts.append(
            "--optimization.max-deriv-time-relative={0}".format(
                int(max_deriv_time_relative)))

    threads = []
    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if iter % 20 == 0 and iter > 0 else "")

    for job in range(1, num_jobs + 1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1
        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1
        # previous : frame_shift = (k/num_archives) % frame_subsampling_factor
        frame_shift = ((archive_index + k / num_archives) %
                       frame_subsampling_factor)

        cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(
            dir=dir, iter=iter) if iter > 0 else "") +
                         (" --write-cache={0}/cache.{1}".format(dir, iter + 1)
                          if job == 1 else ""))

        thread = common_lib.background_command(
            """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \
                    nnet3-chain-train {parallel_train_opts} {verbose_opt} \
                    --apply-deriv-weights={app_deriv_wts} \
                    --l2-regularize={l2} --leaky-hmm-coefficient={leaky} \
                    {cache_io_opts}  --xent-regularize={xent_reg} \
                    {deriv_time_opts} \
                    --print-interval=10 --momentum={momentum} \
                    --max-param-change={max_param_change} \
                    --backstitch-training-scale={backstitch_training_scale} \
                    --backstitch-training-interval={backstitch_training_interval} \
                    --srand={srand} \
                    "{raw_model}" {dir}/den.fst \
                    "ark,bg:nnet3-chain-copy-egs \
                        --frame-shift={fr_shft} \
                        ark:{egs_dir}/cegs.{archive_index}.ark ark:- | \
                        nnet3-chain-shuffle-egs --buffer-size={buf_size} \
                        --srand={srand} ark:- ark:- | nnet3-chain-merge-egs \
                        --minibatch-size={num_chunk_per_mb} ark:- ark:- |" \
                    {dir}/{next_iter}.{job}.raw""".format(
                command=run_opts.command,
                train_queue_opt=run_opts.train_queue_opt,
                dir=dir,
                iter=iter,
                srand=iter + srand,
                next_iter=iter + 1,
                job=job,
                deriv_time_opts=" ".join(deriv_time_opts),
                app_deriv_wts=apply_deriv_weights,
                fr_shft=frame_shift,
                l2=l2_regularize,
                xent_reg=xent_regularize,
                leaky=leaky_hmm_coefficient,
                cache_io_opts=cache_io_opts,
                parallel_train_opts=run_opts.parallel_train_opts,
                verbose_opt=verbose_opt,
                momentum=momentum,
                max_param_change=max_param_change,
                backstitch_training_scale=backstitch_training_scale,
                backstitch_training_interval=backstitch_training_interval,
                raw_model=raw_model_string,
                egs_dir=egs_dir,
                archive_index=archive_index,
                buf_size=shuffle_buffer_size,
                num_chunk_per_mb=num_chunk_per_minibatch_str),
            require_zero_status=True)

        threads.append(thread)

    for thread in threads:
        thread.join()
Exemple #18
0
def compute_train_cv_probabilities(dir,
                                   iter,
                                   egs_dir,
                                   l2_regularize,
                                   left_context,
                                   right_context,
                                   xent_regularize,
                                   leaky_hmm_coefficient,
                                   run_opts,
                                   use_multitask_egs=False,
                                   den_fst_to_output_list=None):
    model = '{0}/{1}.raw'.format(dir, iter)
    scp_or_ark = "scp" if use_multitask_egs else "ark"
    egs_suffix = ".scp" if use_multitask_egs else ".cegs"

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
        egs_dir,
        egs_prefix="valid_diagnostic.",
        use_multitask_egs=use_multitask_egs)

    if den_fst_to_output_list is not None:
        den_fst_output = []
        den_fst_list = []
        for den_fst_to_output in den_fst_to_output_list.split():
            fst_and_output = den_fst_to_output.split(":")
            print fst_and_output
            assert (len(fst_and_output) == 2)
            assert (os.path.exists("{0}/{1}".format(dir, fst_and_output[0])))
            den_fst_list.append("{0}/{1}".format(dir, fst_and_output[0]))
            den_fst_output.append(fst_and_output[1])
        den_fst_str = " ".join(den_fst_list)
        den_fst_output_opts = "--den-fst-to-output={0}".format(
            ",".join(den_fst_output))
    else:
        assert (os.path.exists("{dir}/den.fst".format(dir=dir)))
        den_fst_str = "{dir}/den.fst".format(dir=dir)
        den_fst_output_opts = ""
    common_lib.background_command(
        """{command} {dir}/log/compute_prob_valid.{iter}.log \
                nnet3-chain-compute-prob {den_fst_output_opts} --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                {model} {den_fsts} \
                "ark,bg:nnet3-chain-copy-egs --left-context={lc} {multitask_egs_opts} \
                    --right-context={rc} {scp_or_ark}:{egs_dir}/valid_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   lc=left_context,
                   rc=right_context,
                   l2=l2_regularize,
                   leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark,
                   egs_suffix=egs_suffix,
                   den_fst_output_opts=den_fst_output_opts,
                   den_fsts=den_fst_str))

    multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
        egs_dir,
        egs_prefix="train_diagnostic.",
        use_multitask_egs=use_multitask_egs)
    common_lib.background_command(
        """{command} {dir}/log/compute_prob_train.{iter}.log \
                nnet3-chain-compute-prob {den_fst_output_opts} --l2-regularize={l2} \
                --leaky-hmm-coefficient={leaky} --xent-regularize={xent_reg} \
                {model} {den_fst_str} \
                "ark,bg:nnet3-chain-copy-egs --left-context={lc} {multitask_egs_opts} \
                    --right-context={rc} {scp_or_ark}:{egs_dir}/train_diagnostic{egs_suffix} \
                    ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \
        """.format(command=run_opts.command,
                   dir=dir,
                   iter=iter,
                   model=model,
                   lc=left_context,
                   rc=right_context,
                   l2=l2_regularize,
                   leaky=leaky_hmm_coefficient,
                   xent_reg=xent_regularize,
                   egs_dir=egs_dir,
                   multitask_egs_opts=multitask_egs_opts,
                   scp_or_ark=scp_or_ark,
                   egs_suffix=egs_suffix,
                   den_fst_str=den_fst_str,
                   den_fst_output_opts=den_fst_output_opts))
Exemple #19
0
def train_new_models(dir,
                     iter,
                     srand,
                     num_jobs,
                     num_archives_processed,
                     num_archives,
                     raw_model_string,
                     egs_dir,
                     momentum,
                     max_param_change,
                     shuffle_buffer_size,
                     minibatch_size_str,
                     image_augmentation_opts,
                     run_opts,
                     frames_per_eg=-1,
                     min_deriv_time=None,
                     max_deriv_time_relative=None,
                     use_multitask_egs=False,
                     backstitch_training_scale=0.0,
                     backstitch_training_interval=1):
    """ Called from train_one_iteration(), this model does one iteration of
    training with 'num_jobs' jobs, and writes files like
    exp/tdnn_a/24.{1,2,3,..<num_jobs>}.raw

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    this is no longer true for RNNs as we use do not use the --frame option
    but we use the same script for consistency with FF-DNN code

    Selected args:
        frames_per_eg:
            The frames_per_eg, in the context of (non-chain) nnet3 training,
            is normally the number of output (supervised) frames in each training
            example.  However, the frames_per_eg argument to this function should
            only be set to that number (greater than zero) if you intend to
            train on a single frame of each example, on each minibatch.  If you
            provide this argument >0, then for each training job a different
            frame from the dumped example is selected to train on, based on
            the option --frame=n to nnet3-copy-egs.
            If you leave frames_per_eg at its default value (-1), then the
            entire sequence of frames is used for supervision.  This is suitable
            for RNN training, where it helps to amortize the cost of computing
            the activations for the frames of context needed for the recurrence.
        use_multitask_egs : True, if different examples used to train multiple
            tasks or outputs, e.g.multilingual training.  multilingual egs can
            be generated using get_egs.sh and
            steps/nnet3/multilingual/allocate_multilingual_examples.py, those
            are the top-level scripts.
    """

    chunk_level_training = False if frames_per_eg > 0 else True

    deriv_time_opts = []
    if min_deriv_time is not None:
        deriv_time_opts.append(
            "--optimization.min-deriv-time={0}".format(min_deriv_time))
    if max_deriv_time_relative is not None:
        deriv_time_opts.append(
            "--optimization.max-deriv-time-relative={0}".format(
                max_deriv_time_relative))

    threads = []

    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if iter % 20 == 0 and iter > 0 else "")

    for job in range(1, num_jobs + 1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1

        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1

        if not chunk_level_training:
            frame = (k / num_archives + archive_index) % frames_per_eg

        cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(
            dir=dir, iter=iter) if iter > 0 else "") +
                         (" --write-cache={0}/cache.{1}".format(dir, iter + 1)
                          if job == 1 else ""))

        if image_augmentation_opts:
            image_augmentation_cmd = (
                'nnet3-egs-augment-image --srand={srand} {aug_opts} ark:- ark:- |'
                .format(srand=k + srand, aug_opts=image_augmentation_opts))
        else:
            image_augmentation_cmd = ''

        multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
            egs_dir,
            egs_prefix="egs.",
            archive_index=archive_index,
            use_multitask_egs=use_multitask_egs)

        scp_or_ark = "scp" if use_multitask_egs else "ark"

        egs_rspecifier = (
            """ark,bg:nnet3-copy-egs {frame_opts} {multitask_egs_opts} \
            {scp_or_ark}:{egs_dir}/egs.{archive_index}.{scp_or_ark} ark:- | \
            nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} \
            --srand={srand} ark:- ark:- | {aug_cmd} \
            nnet3-merge-egs --minibatch-size={minibatch_size} ark:- ark:- |""".
            format(frame_opts=("" if chunk_level_training else
                               "--frame={0}".format(frame)),
                   egs_dir=egs_dir,
                   archive_index=archive_index,
                   shuffle_buffer_size=shuffle_buffer_size,
                   minibatch_size=minibatch_size_str,
                   aug_cmd=image_augmentation_cmd,
                   srand=iter + srand,
                   scp_or_ark=scp_or_ark,
                   multitask_egs_opts=multitask_egs_opts))

        # note: the thread waits on that process's completion.
        thread = common_lib.background_command(
            """sleep {time}; {command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \
                    nnet3-train {parallel_train_opts} {cache_io_opts} \
                     {verbose_opt} --print-interval=10 \
                    --momentum={momentum} \
                    --max-param-change={max_param_change} \
                    --backstitch-training-scale={backstitch_training_scale} \
                    --l2-regularize-factor={l2_regularize_factor} \
                    --backstitch-training-interval={backstitch_training_interval} \
                    --srand={srand} \
                    {deriv_time_opts} "{raw_model}" "{egs_rspecifier}" \
                    {dir}/{next_iter}.{job}.raw""".format(
                time=((job - 1) * 10),
                command=run_opts.command,
                train_queue_opt=run_opts.train_queue_opt,
                dir=dir,
                iter=iter,
                next_iter=iter + 1,
                srand=iter + srand,
                job=job,
                parallel_train_opts=run_opts.parallel_train_opts,
                cache_io_opts=cache_io_opts,
                verbose_opt=verbose_opt,
                momentum=momentum,
                max_param_change=max_param_change,
                l2_regularize_factor=1.0 / num_jobs,
                backstitch_training_scale=backstitch_training_scale,
                backstitch_training_interval=backstitch_training_interval,
                deriv_time_opts=" ".join(deriv_time_opts),
                raw_model=raw_model_string,
                egs_rspecifier=egs_rspecifier),
            require_zero_status=True)

        threads.append(thread)

    for thread in threads:
        thread.join()
Exemple #20
0
def train_new_models(dir, iter, srand, num_jobs,
                     num_archives_processed, num_archives,
                     raw_model_string, egs_dir,
                     momentum, max_param_change,
                     shuffle_buffer_size, minibatch_size_str,
                     image_augmentation_opts,
                     run_opts, frames_per_eg=-1,
                     min_deriv_time=None, max_deriv_time_relative=None,
                     use_multitask_egs=False,
                     backstitch_training_scale=0.0, backstitch_training_interval=1):
    """ Called from train_one_iteration(), this model does one iteration of
    training with 'num_jobs' jobs, and writes files like
    exp/tdnn_a/24.{1,2,3,..<num_jobs>}.raw

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    this is no longer true for RNNs as we use do not use the --frame option
    but we use the same script for consistency with FF-DNN code

    Selected args:
        frames_per_eg: The default value -1 implies chunk_level_training, which
            is particularly applicable to RNN training. If it is > 0, then it
            implies frame-level training, which is applicable for DNN training.
            If it is > 0, then each parallel SGE job created, a different frame
            numbered 0..frames_per_eg-1 is used.
        use_multitask_egs : True, if different examples used to train multiple
                            tasks or outputs, e.g.multilingual training.
                            multilingual egs can be generated using get_egs.sh and
                            steps/nnet3/multilingual/allocate_multilingual_examples.py,
                            those are the top-level scripts.
    """

    chunk_level_training = False if frames_per_eg > 0 else True

    deriv_time_opts = []
    if min_deriv_time is not None:
        deriv_time_opts.append("--optimization.min-deriv-time={0}".format(
                           min_deriv_time))
    if max_deriv_time_relative is not None:
        deriv_time_opts.append("--optimization.max-deriv-time-relative={0}".format(
                           max_deriv_time_relative))

    threads = []

    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if iter % 20 == 0 and iter > 0 else "")

    for job in range(1, num_jobs+1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1

        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1

        if not chunk_level_training:
            frame = (k / num_archives + archive_index) % frames_per_eg

        cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(dir=dir,
                                                                  iter=iter)
                          if iter > 0 else "") +
                         (" --write-cache={0}/cache.{1}".format(dir, iter + 1)
                          if job == 1 else ""))

        if image_augmentation_opts:
            image_augmentation_cmd = (
                'nnet3-egs-augment-image --srand={srand} {aug_opts} ark:- ark:- |'.format(
                    srand=k+srand,
                    aug_opts=image_augmentation_opts))
        else:
            image_augmentation_cmd = ''


        multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
            egs_dir,
            egs_prefix="egs.",
            archive_index=archive_index,
            use_multitask_egs=use_multitask_egs)

        scp_or_ark = "scp" if use_multitask_egs else "ark"

        egs_rspecifier = (
            """ark,bg:nnet3-copy-egs {frame_opts} {multitask_egs_opts} \
            {scp_or_ark}:{egs_dir}/egs.{archive_index}.{scp_or_ark} ark:- | \
            nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} \
            --srand={srand} ark:- ark:- | {aug_cmd} \
            nnet3-merge-egs --minibatch-size={minibatch_size} ark:- ark:- |""".format(
                frame_opts=("" if chunk_level_training
                            else "--frame={0}".format(frame)),
                egs_dir=egs_dir, archive_index=archive_index,
                shuffle_buffer_size=shuffle_buffer_size,
                minibatch_size=minibatch_size_str,
                aug_cmd=image_augmentation_cmd,
                srand=iter+srand,
                scp_or_ark=scp_or_ark,
                multitask_egs_opts=multitask_egs_opts))

        # note: the thread waits on that process's completion.
        thread = common_lib.background_command(
            """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \
                    nnet3-train {parallel_train_opts} {cache_io_opts} \
                     {verbose_opt} --print-interval=10 \
                    --momentum={momentum} \
                    --max-param-change={max_param_change} \
                    --backstitch-training-scale={backstitch_training_scale} \
                    --backstitch-training-interval={backstitch_training_interval} \
                    --srand={srand} \
                    {deriv_time_opts} "{raw_model}" "{egs_rspecifier}" \
                    {dir}/{next_iter}.{job}.raw""".format(
                command=run_opts.command,
                train_queue_opt=run_opts.train_queue_opt,
                dir=dir, iter=iter,
                next_iter=iter + 1, srand=iter + srand,
                job=job,
                parallel_train_opts=run_opts.parallel_train_opts,
                cache_io_opts=cache_io_opts,
                verbose_opt=verbose_opt,
                momentum=momentum, max_param_change=max_param_change,
                backstitch_training_scale=backstitch_training_scale,
                backstitch_training_interval=backstitch_training_interval,
                deriv_time_opts=" ".join(deriv_time_opts),
                raw_model=raw_model_string,
                egs_rspecifier=egs_rspecifier),
            require_zero_status=True)

        threads.append(thread)

    for thread in threads:
        thread.join()
Exemple #21
0
def train_new_models(dir,
                     iter,
                     srand,
                     num_jobs,
                     num_archives_processed,
                     num_archives,
                     raw_model_string,
                     egs_dir,
                     momentum,
                     max_param_change,
                     shuffle_buffer_size,
                     minibatch_size_str,
                     image_augmentation_opts,
                     run_opts,
                     frames_per_eg=-1,
                     min_deriv_time=None,
                     max_deriv_time_relative=None):
    """ Called from train_one_iteration(), this model does one iteration of
    training with 'num_jobs' jobs, and writes files like
    exp/tdnn_a/24.{1,2,3,..<num_jobs>}.raw

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    this is no longer true for RNNs as we use do not use the --frame option
    but we use the same script for consistency with FF-DNN code

    Selected args:
        frames_per_eg: The default value -1 implies chunk_level_training, which
            is particularly applicable to RNN training. If it is > 0, then it
            implies frame-level training, which is applicable for DNN training.
            If it is > 0, then each parallel SGE job created, a different frame
            numbered 0..frames_per_eg-1 is used.
    """

    chunk_level_training = False if frames_per_eg > 0 else True

    deriv_time_opts = []
    if min_deriv_time is not None:
        deriv_time_opts.append(
            "--optimization.min-deriv-time={0}".format(min_deriv_time))
    if max_deriv_time_relative is not None:
        deriv_time_opts.append(
            "--optimization.max-deriv-time-relative={0}".format(
                max_deriv_time_relative))

    threads = []

    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if iter % 20 == 0 and iter > 0 else "")

    for job in range(1, num_jobs + 1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1

        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1

        if not chunk_level_training:
            frame = (k / num_archives + archive_index) % frames_per_eg

        cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(
            dir=dir, iter=iter) if iter > 0 else "") +
                         (" --write-cache={0}/cache.{1}".format(dir, iter + 1)
                          if job == 1 else ""))

        if image_augmentation_opts:
            image_augmentation_cmd = (
                'nnet3-egs-augment-image --srand={srand} {aug_opts} ark:- ark:- |'
                .format(srand=k + srand, aug_opts=image_augmentation_opts))
        else:
            image_augmentation_cmd = ''

        # note: the thread waits on that process's completion.
        thread = common_lib.background_command(
            """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \
                    nnet3-train {parallel_train_opts} {cache_io_opts} \
                     {verbose_opt} --print-interval=10 \
                    --momentum={momentum} \
                    --max-param-change={max_param_change} \
                    {deriv_time_opts} "{raw_model}" \
                    "ark,bg:nnet3-copy-egs {frame_opts} """
            """ark:{egs_dir}/egs.{archive_index}.ark ark:- |"""
            """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """
            """--srand={srand} ark:- ark:- | {aug_cmd} """
            """nnet3-merge-egs --minibatch-size={minibatch_size_str} """
            """ ark:- ark:- |" {dir}/{next_iter}.{job}.raw""".format(
                command=run_opts.command,
                train_queue_opt=run_opts.train_queue_opt,
                dir=dir,
                iter=iter,
                srand=iter + srand,
                next_iter=iter + 1,
                job=job,
                parallel_train_opts=run_opts.parallel_train_opts,
                cache_io_opts=cache_io_opts,
                verbose_opt=verbose_opt,
                frame_opts=("" if chunk_level_training else
                            "--frame={0}".format(frame)),
                momentum=momentum,
                max_param_change=max_param_change,
                deriv_time_opts=" ".join(deriv_time_opts),
                raw_model=raw_model_string,
                egs_dir=egs_dir,
                archive_index=archive_index,
                shuffle_buffer_size=shuffle_buffer_size,
                minibatch_size_str=minibatch_size_str,
                aug_cmd=image_augmentation_cmd),
            require_zero_status=True)

        threads.append(thread)

    for thread in threads:
        thread.join()
Exemple #22
0
def train_new_models(dir, iter, srand, num_jobs,
                     num_archives_processed, num_archives,
                     raw_model_string, egs_dir,
                     apply_deriv_weights,
                     min_deriv_time, max_deriv_time_relative,
                     l2_regularize, xent_regularize, leaky_hmm_coefficient,
                     momentum, max_param_change,
                     shuffle_buffer_size, num_chunk_per_minibatch_str,
                     frame_subsampling_factor, run_opts, train_opts,
                     backstitch_training_scale=0.0, backstitch_training_interval=1,
                     use_multitask_egs=False):
    """
    Called from train_one_iteration(), this method trains new models
    with 'num_jobs' jobs, and
    writes files like exp/tdnn_a/24.{1,2,3,..<num_jobs>}.raw

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    this is no longer true for RNNs as we use do not use the --frame option
    but we use the same script for consistency with FF-DNN code

    use_multitask_egs : True, if different examples used to train multiple
                        tasks or outputs, e.g.multilingual training.
                        multilingual egs can be generated using get_egs.sh and
                        steps/nnet3/multilingual/allocate_multilingual_examples.py,
                        those are the top-level scripts.
    """

    deriv_time_opts = []
    if min_deriv_time is not None:
        deriv_time_opts.append("--optimization.min-deriv-time={0}".format(
                                    min_deriv_time))
    if max_deriv_time_relative is not None:
        deriv_time_opts.append("--optimization.max-deriv-time-relative={0}".format(
                                    int(max_deriv_time_relative)))

    threads = []
    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if iter % 20 == 0 and iter > 0 else "")

    for job in range(1, num_jobs+1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1
        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1
        # previous : frame_shift = (k/num_archives) % frame_subsampling_factor
        frame_shift = ((archive_index + k/num_archives)
                       % frame_subsampling_factor)

        multitask_egs_opts = common_train_lib.get_multitask_egs_opts(
            egs_dir,
            egs_prefix="cegs.",
            archive_index=archive_index,
            use_multitask_egs=use_multitask_egs)
        scp_or_ark = "scp" if use_multitask_egs else "ark"
        cache_io_opts = (("--read-cache={dir}/cache.{iter}".format(dir=dir,
                                                                  iter=iter)
                          if iter > 0 else "") +
                         (" --write-cache={0}/cache.{1}".format(dir, iter + 1)
                          if job == 1 else ""))

        thread = common_lib.background_command(
            """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \
                    nnet3-chain-train {parallel_train_opts} {verbose_opt} \
                    --apply-deriv-weights={app_deriv_wts} \
                    --l2-regularize={l2} --leaky-hmm-coefficient={leaky} \
                    {cache_io_opts}  --xent-regularize={xent_reg} \
                    {deriv_time_opts} \
                    --print-interval=10 --momentum={momentum} \
                    --max-param-change={max_param_change} \
                    --backstitch-training-scale={backstitch_training_scale} \
                    --backstitch-training-interval={backstitch_training_interval} \
                    --l2-regularize-factor={l2_regularize_factor} {train_opts} \
                    --srand={srand} \
                    "{raw_model}" {dir}/den.fst \
                    "ark,bg:nnet3-chain-copy-egs {multitask_egs_opts} \
                        --frame-shift={fr_shft} \
                        {scp_or_ark}:{egs_dir}/cegs.{archive_index}.{scp_or_ark} ark:- | \
                        nnet3-chain-shuffle-egs --buffer-size={buf_size} \
                        --srand={srand} ark:- ark:- | nnet3-chain-merge-egs \
                        --minibatch-size={num_chunk_per_mb} ark:- ark:- |" \
                    {dir}/{next_iter}.{job}.raw""".format(
                        command=run_opts.command,
                        train_queue_opt=run_opts.train_queue_opt,
                        dir=dir, iter=iter, srand=iter + srand,
                        next_iter=iter + 1, job=job,
                        deriv_time_opts=" ".join(deriv_time_opts),
                        app_deriv_wts=apply_deriv_weights,
                        fr_shft=frame_shift, l2=l2_regularize,
                        train_opts=train_opts,
                        xent_reg=xent_regularize, leaky=leaky_hmm_coefficient,
                        cache_io_opts=cache_io_opts,
                        parallel_train_opts=run_opts.parallel_train_opts,
                        verbose_opt=verbose_opt,
                        momentum=momentum, max_param_change=max_param_change,
                        backstitch_training_scale=backstitch_training_scale,
                        backstitch_training_interval=backstitch_training_interval,
                        l2_regularize_factor=1.0/num_jobs,
                        raw_model=raw_model_string,
                        egs_dir=egs_dir, archive_index=archive_index,
                        buf_size=shuffle_buffer_size,
                        num_chunk_per_mb=num_chunk_per_minibatch_str,
                        multitask_egs_opts=multitask_egs_opts,
                        scp_or_ark=scp_or_ark),
            require_zero_status=True)

        threads.append(thread)

    for thread in threads:
        thread.join()