Example #1
0
def eval_trained_dnn(main_dir, _iter, egs_dir, run_opts):
    input_model_dir = "{dir}/model_{iter}".format(dir=main_dir, iter=_iter)

    # we assume that there are just one tar file for validation
    tar_file = ("{0}/valid_egs.1.tar".format(egs_dir))

    _command = '{command} "{main_dir}/log/compute_prob_valid.{iter}.log" ' \
               'local/tf/eval_dnn.py ' \
               '--tar-file="{tar_file}" --use-gpu=no ' \
               '--log-file="{main_dir}/log/compute_prob_valid.{iter}.log" ' \
               '--input-dir="{input_model_dir}"'.format(command=run_opts.command,
                                                        main_dir=main_dir,
                                                        iter=_iter,
                                                        tar_file=tar_file,
                                                        input_model_dir=input_model_dir)

    utils.background_command(_command)

    # we assume that there are just one tar file for train diagnostics
    tar_file = ("{0}/train_subset_egs.1.tar".format(egs_dir))

    _command = '{command} "{main_dir}/log/compute_prob_train_subset.{iter}.log" ' \
               'local/tf/eval_dnn.py ' \
               '--tar-file="{tar_file}" --use-gpu=no ' \
               '--log-file="{main_dir}/log/compute_prob_train_subset.{iter}.log" ' \
               '--input-dir="{input_model_dir}"'.format(command=run_opts.command,
                                                        main_dir=main_dir,
                                                        iter=_iter,
                                                        tar_file=tar_file,
                                                        input_model_dir=input_model_dir)

    utils.background_command(_command)
Example #2
0
def train_new_models(model_dir,
                     _iter,
                     random_seed,
                     num_jobs,
                     num_archives_processed,
                     num_archives,
                     learning_rate,
                     shrinkage_value,
                     dropout_proportion,
                     egs_dir,
                     momentum,
                     max_param_change,
                     minibatch_size,
                     run_opts,
                     feature_dim,
                     archives_minibatch_count,
                     try_count=0,
                     train_opts=""):
    """ Called from train_one_iteration(), this model does one iteration of
    training with 'num_jobs' jobs, and writes models in dirs like
    exp/tdnn_a/model_24.{1,2,3,..<num_jobs>}

    We cannot easily use a single parallel SGE job to do the main training,
    because the computation of which archive and which --frame option
    to use for each job is a little complex, so we spawn each one separately.
    """

    threads = []

    # the GPU timing info is only printed if we use the --verbose=1 flag; this
    # slows down the computation slightly, so don't accumulate it on every
    # iteration.  Don't do it on iteration 0 either, because we use a smaller
    # than normal minibatch size, and people may get confused thinking it's
    # slower for iteration 0 because of the verbose option.
    verbose_opt = ("--verbose=1" if _iter % 20 == 0 and _iter > 0 else "")

    for job in range(1, num_jobs + 1):
        # k is a zero-based index that we will derive the other indexes from.
        k = num_archives_processed + job - 1

        # work out the 1-based archive index.
        archive_index = (k % num_archives) + 1
        minibatch_count = archives_minibatch_count[archive_index]

        if try_count > 0 and utils.is_correct_model_dir(
                '{0}/model_{1}.{2}'.format(model_dir, _iter + 1, job)):
            continue

        egs_rspecifier = \
            '--ranges-file="{egs_dir}/temp/ranges.{archive_index}" ' \
            '--scp-file="{egs_dir}/temp/feats.scp.{archive_index}" ' \
            '--shuffle=True --minibatch-size={minibatch_size}'.format(
                egs_dir=egs_dir, archive_index=archive_index,
                minibatch_size=minibatch_size)

        # check whether tar file exist or not. If it was generated, so lets pass it to the script for speedup
        tar_file = '{egs_dir}/egs.{archive_index}.tar'.format(
            egs_dir=egs_dir, archive_index=archive_index)
        if os.path.exists(tar_file):
            egs_rspecifier = '--tar-file="{0}" {1}'.format(
                tar_file, egs_rspecifier)

        _command = '{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log ' \
                   'local/tf/train_dnn_one_iteration.py ' \
                   '{parallel_train_opts} ' \
                   '{verbose_opt} --print-interval=10 ' \
                   '--momentum={momentum} ' \
                   '--max-param-change={max_param_change} ' \
                   '--l2-regularize-factor={l2_regularize_factor} ' \
                   '--random-seed={random_seed} {train_opts} ' \
                   '--learning-rate={learning_rate} ' \
                   '--scale={shrinkage_value} ' \
                   '--minibatch-count={minibatch_count} ' \
                   '--feature-dim={feature_dim} ' \
                   '--dropout-proportion={dropout_proportion} ' \
                   '{egs_rspecifier} ' \
                   '--input-dir={dir}/model_{iter} ' \
                   '--output-dir={dir}/model_{next_iter}.{job}' \
            .format(command=run_opts.command,
                    train_queue_opt=run_opts.train_queue_opt,
                    dir=model_dir, iter=_iter,
                    next_iter=_iter + 1, random_seed=_iter + random_seed,
                    job=job,
                    parallel_train_opts=run_opts.parallel_train_opts,
                    verbose_opt=verbose_opt,
                    momentum=momentum, max_param_change=max_param_change,
                    l2_regularize_factor=1.0 / num_jobs,
                    train_opts=train_opts,
                    learning_rate=learning_rate,
                    shrinkage_value=shrinkage_value,
                    minibatch_count=minibatch_count,
                    feature_dim=feature_dim,
                    dropout_proportion=dropout_proportion,
                    egs_rspecifier=egs_rspecifier)

        thread = utils.background_command(_command, require_zero_status=False)
        threads.append(thread)

    for thread in threads:
        thread.join()